Python get_soupの例、globals.get_soup Pythonの例

コード例 #1

0

ファイルを表示

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find_all('h4')
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list:
        job_class.title = job_entry.a.text
        job_class.info_link = job_entry.a['href']
        listing_soup = get_soup(job_class.info_link)

        if listing_soup.body.find_all('p', string="Job Type: Full-time"):
            job_class.full_or_part = 'Full-time'
        elif listing_soup.body.find_all('p', string="Job Type: Part-time"):
            job_class.full_or_part = 'Part-time'

        date_text = listing_soup.body.find_all(
            'span', {'class': 'subtitle'})[0].text.split()

        month_string = date_text[2]
        day = int(date_text[3][0:len(date_text[3]) - 1])
        year = int(date_text[4])
        month = month_to_num(month_string)

        job_class.post_date = datetime(year, month, day)

        insert_count += job_insert(job_class)
    return insert_count

コード例 #2

0

ファイルを表示

def run(url):
    soup = get_soup(url)
    jobs_table = soup.find('table', {'id': 'job-result-table'})
    job_class = Job(organization, "")
    job_class.post_date = ""
    job_class.organization_id = organization_id
    insert_count = 0
    for job_row in jobs_table.find_all('tr', {'class': 'job-result'}):
        job_title_cell = job_row.find('td', {'class': 'job-result-title-cell'})
        job_class.title = job_title_cell.a.text.strip()
        job_class.info_link = 'https://pennylanecenters.jobs.net' + \
            job_title_cell.a['href']
        job_class.location = clean_location(
            job_row.find('div', {
                'class': 'job-location-line'
            }).text)
        job_class.zip_code = city_to_zip(job_class.location)
        # Get Job Soup
        job_soup = get_soup(job_class.info_link)
        job_class.full_or_part = job_soup.find('li', {
            'class': 'job-employee-type'
        }).find('div', {
            'class': 'secondary-text-color'
        }).text
        job_class.post_date = string_to_date(
            job_soup.find('li', {
                'class': 'job-date-posted'
            }).find('div', {
                'class': 'secondary-text-color'
            }).text)
        insert_count += job_insert(job_class)
    return insert_count

コード例 #3

0

ファイルを表示

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find_all('h4')

    for job_entry in jobs_list:
        globals.job_title = job_entry.a.text
        globals.info_link = job_entry.a['href']
        globals.job_summary = globals.info_link
        listing_soup = get_soup(globals.info_link)

        if listing_soup.body.find_all('p', string="Job Type: Full-time"):
            globals.full_or_part = 'Full-time'
        elif listing_soup.body.find_all('p', string="Job Type: Part-time"):
            globals.full_or_part = 'Part-time'

        date_text = listing_soup.body.find_all(
            'span', {'class': 'subtitle'})[0].text.split()

        month_string = date_text[2]
        day = int(date_text[3][0:len(date_text[3]) - 1])
        year = int(date_text[4])
        month = month_to_num(month_string)

        globals.job_post_date = datetime(year, month, day)

        update_db(organization)

コード例 #4

0

ファイルを表示

def run(url):
    soup = get_soup(url)
    job_lists = soup.find('div', {'class': 'post'}).find_all('ul')[:-1]
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for index, job_list in enumerate(job_lists):
        for job_entry in job_list.find_all('li'):
            if index == 0:
                job_class.full_or_part = 'Full-Time'
            elif index == 1:
                job_class.full_or_part = 'Part-Time'
            else:
                job_class.full_or_part = 'On-Call'
            job_class.title = job_entry.a.text
            job_class.info_link = job_entry.a['href']
            job_soup = get_soup(job_class.info_link)
            job_details = job_soup.find('div', {'aria-label': 'Job Details'})
            if job_details:
                job_class.location = job_details.find(
                    'span', {
                        'aria-label': 'Job Location'
                    }).text
                job_class.salary = job_details.find(
                    'span', {
                        'aria-label': 'Salary Range'
                    }).text
            insert_count += job_insert(job_class)
    return insert_count

コード例 #5

0

ファイルを表示

def run(url):
    soup = get_soup(url + initialPath)
    page = 1
    insert_count = 0
    while soup:
        for html_element in soup.find_all('div', {'class': 'views-row'}):
            title = html_element.find('span', {
                'class': 'field-content'
            }).a.text
            job = Job(organization, title)
            job.organization_id = organization_id
            location_div = html_element.find(
                'div', {'class': 'views-field-field-job-city'})
            if location_div:
                job.location = location_div.find('span', {
                    'class': 'field-content'
                }).text
            summarySpan = html_element.find(
                'div', {
                    'class': 'views-field views-field-body-summary'
                }).span

            if (summarySpan != None):
                #if (summarySpan.p != None):
                job.summary = summarySpan.text

            info_div = html_element.find('div', {'class': 'views-field-url'})
            job.info_link = info_div.find('span', {
                'class': 'field-content'
            }).a['href']
            info_soup = get_soup(job.info_link)
            salary_div = info_soup.find(
                'div', {'class': 'views-field-field-compensation-range'})
            if salary_div:
                job.salary = salary_div.find('span', {
                    'class': 'field-content'
                }).text
                hours_div = info_soup.find(
                    'div', {'class': 'views-field-field-hours-week'})
            if hours_div:
                hours = hours_div.find('span', {'class': 'field-content'}).text
                job.full_or_part = hours + ' hours/week'
            insert_count += job_insert(job)
            # print(job)

        # If there are more pages, update soup to next page and scrape
        if soup.find('a', {'title': 'Go to next page'}):
            next_page_button = soup.find('a', {'title': 'Go to next page'})
            next_page_url = url + next_page_button['href']
            # print(next_page_url)
            soup = get_soup(next_page_url)
            page = page + 1
        else:
            soup = False
    return insert_count

コード例 #6

0

ファイルを表示

ファイル: jwch-institute-inc.py プロジェクト: nclairesays/jobs-for-hope

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find('ul', {'class': 'lcp_catlist'})

    for job_entry in jobs_list.find_all('li'):
        globals.job_title = job_entry.a.text.strip()
        globals.info_link = job_entry.a['href']
        job_soup = get_soup(globals.info_link)
        summary_match = job_soup.find(text=re.compile("Position Purpose:"))
        if summary_match is not None:
            globals.job_summary = summary_match.parent.parent.text
        else:
            raise globals.ParseError(globals.info_link,
                                     'Cannot find job summary')
        update_db(organization)

コード例 #7

0

ファイルを表示

ファイル: mental_health_america_of_los_angeles.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_javascript_soup_delayed_and_click(url, 'hrmSearchButton')

    job_listings = soup.find_all('tr', {'class': 'ReqRowClick'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_row in job_listings:
        job_class.title = job_row.find('td', {
            'class': 'posTitle'
        }).text.strip()
        job_class.info_link = 'http://mhala.hrmdirect.com/employment/' + \
            job_row.find('td', {'class': 'posTitle'}).a['href']
        job_class.location = job_row.find('td', {'class': 'cities'}).text
        job_class.zip_code = globals.city_to_zip(job_class.location)
        job_soup = get_soup(job_class.info_link)
        summary = job_soup.find(string=["Summary:", "Summary: "])
        if summary:
            summary_parent = summary.parent
            summary_parent.clear()
            job_class.summary = summary_parent.find_parent("p").text.strip()
        else:
            job_class.summary = ''
        insert_count += job_insert(job_class)
    return insert_count

コード例 #8

0

ファイルを表示

ファイル: people_assisting_the_homeless.py プロジェクト: michaelkma/jobs-for-hope

def run(url):
    soup = get_soup(url)

    jobs_list = soup.select('div[class*="JobGrid-"]')[0]
    job_class= Job(organization, "")
    job_class.organization_id= organization_id
    insert_count= 0
    for job_entry in jobs_list.find_all('a'):
        job_class.info_link = 'https://path.catsone.com' + job_entry['href']
        job_row = job_entry.find('div', {'class': 'row'})
        job_divs = job_row.find_all('div')
        job_class.title = job_divs[0].text.strip()
        job_class.location = clean_location(job_divs[2].text.strip())
        job_class.zip_code = city_to_zip(job_class.location)
        insert_count+= job_insert(job_class)
        # Possible to get more info by scraping each job link, but the listings are extremely poorly written/standardized; scraper below works for most of the listings, but a few poorly written listings break the scraper
        # job_soup = get_soup(info_link)
        # job_description = job_soup.find('div',{'class':'Job__StyledDescription-s1h17u0t-0'})
        # if '\n' in job_description.find_all('strong')[0].text:
        #     full_or_part = job_description.find_all('strong')[0].text.split('\n')[1].strip()
        #     salary = job_description.find_all('strong')[0].text.split('\n')[2].strip().split(': ')[1]
        # else:
        #     full_or_part = job_description.find_all('strong')[1].text.strip()
        #     salary = job_description.find_all('strong')[2].text.split('\n')[0].split(':')[1].strip()
    return insert_count

コード例 #9

0

ファイルを表示

def run(url):
    soup = get_soup(url)

    jobs_list = soup.find_all('div', {'class': 'list-data'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list:
        job_info = job_entry.find('div', {'class': 'job-info'})
        job_class.title = job_info.find('span', {
            'class': 'job-title'
        }).text.strip()
        job_class.info_link = job_info.h4.a['href']
        job_class.full_or_part = job_entry.find('div', {
            'class': 'job-type'
        }).text.strip()
        job_class.location = clean_location(
            job_entry.find('div', {
                'class': 'job-location'
            }).text.strip())
        job_class.zip_code = city_to_zip(job_class.location)
        relative_date = job_entry.find('div', {
            'class': 'job-date'
        }).text.strip().split(' ')
        job_class.post_date = date_ago(int(relative_date[1]), relative_date[2])
        job_class.summary = job_entry.find('div', {
            'class': 'job-description'
        }).p.text.strip()
        insert_count += job_insert(job_class)
    return insert_count

コード例 #10

0

ファイルを表示

def run(url):
    soup = get_javascript_soup(url)

    job_listings = soup.find_all('div', {'class': 'job-listing-job-item'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_listing in job_listings:
        job_class.title = job_listing.find('span', {
            'class': 'job-item-title'
        }).a.text.strip()
        job_class.info_link = 'https://recruiting.paylocity.com' + \
            job_listing.find('span', {'class': 'job-item-title'}).a['href']
        details = get_soup(job_class.info_link)
        location = details.find('div', {'class': 'preview-location'})
        if location.a:
            job_class.location = location.a.text
            zipcode = location.a['href'].split('+')[-1]
            try:
                job_class.zip_code = int(zipcode)
            except ValueError:
                # generate a zip code if one is not available
                job_class.zip_code = city_to_zip(job_class.location)
        else:
            job_class.location = ''
            job_class.zip_code = ''
        job_class.post_date = string_to_date(
            job_listing.find('div', {
                'class': 'job-title-column'
            }).find_all('span')[1].text.split(' - ')[0])
        insert_count += job_insert(job_class)
    return insert_count

コード例 #11

0

ファイルを表示

ファイル: the_village_family_services.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_soup(url)

    job_grid = soup.find('div', {'class': 'wpjb-job-list'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_div in job_grid.find_all('div', {'class': 'wpjb-col-main'}):
        major_line = job_div.find('div', {'class': 'wpjb-line-major'})
        job_class.title = major_line.a.text
        job_class.info_link = major_line.a['href']
        job_class.full_or_part = major_line.find('span', {
            'class': 'wpjb-sub-title'
        }).text.strip()
        minor_line = job_div.find('div', {'class': 'wpjb-line-minor'})
        job_class.location = minor_line.find('span', {
            'class': 'wpjb-job_location'
        }).text.strip()
        date = minor_line.find('span', {
            'class': 'wpjb-job_created_at'
        }).text.strip().split(', ')
        month = month_to_num(date[0])
        day = int(date[1])
        if month <= datetime.now().month:
            year = datetime.now().year
        else:
            year = datetime.now().year - 1
        job_class.post_date = datetime(year, month, day)
        insert_count += job_insert(job_class)
    return insert_count

コード例 #12

0

ファイルを表示

ファイル: penny_lane_centers.py プロジェクト: nclairesays/jobs-for-hope

def run(url):
    globals.job_post_date = ''
    soup = get_soup(url)
    jobs_table = soup.find('table',{'id':'job-result-table'})

    for job_row in jobs_table.find_all('tr',{'class':'job-result'}):
        job_title_cell = job_row.find('td',{'class':'job-result-title-cell'})
        globals.job_title = job_title_cell.a.text.strip()
        globals.info_link = 'https://pennylanecenters.jobs.net' + job_title_cell.a['href']
        globals.job_summary = globals.info_link
        globals.job_location = clean_location(job_row.find('div',{'class':'job-location-line'}).text)
        globals.job_zip_code = city_to_zip(globals.job_location)
        # Get Job Soup
        job_soup = get_soup(globals.info_link)
        globals.full_or_part = job_soup.find('li',{'class':'job-employee-type'}).find('div',{'class':'secondary-text-color'}).text
        globals.job_post_date = string_to_date(job_soup.find('li',{'class':'job-date-posted'}).find('div',{'class':'secondary-text-color'}).text)
        update_db(organization)
        reset_vars()

コード例 #13

0

ファイルを表示

ファイル: 211-la-county.py プロジェクト: nclairesays/jobs-for-hope

def run(url):
    soup = get_soup("https://www.211la.org/careers")
    jobs_list = soup.find_all("div", {"class": "jobBtn"})

    for job_entry in jobs_list:
        for child in job_entry.find_all("a"):
            globals.job_title = child.text
            globals.info_link = child.get('href')
        update_db(organization)

コード例 #14

0

ファイルを表示

ファイル: jwch_institute_inc.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find('ul', {'class': 'lcp_catlist'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list.find_all('li'):
        job_class.title = job_entry.a.text.strip()
        job_class.info_link = job_entry.a['href']
        job_soup = get_soup(job_class.info_link)
        summary_match = job_soup.find(text=re.compile("Position Purpose:"))
        if summary_match is not None:
            job_class.summary = summary_match.parent.parent.text
        else:
            raise globals.ParseError(job_class.info_link,
                                     'Cannot find job summary')
        insert_count += job_insert(job_class)
    return insert_count

コード例 #15

0

ファイルを表示

def run(url):
    soup = get_soup(url)
    jobs_div = soup.find('div', {'class':'sqs-block-content'})
    jobs_list = jobs_div.find_all('p')

    for job_entry in jobs_list[4:len(jobs_list)-3]:
        globals.job_title = job_entry.a.text.strip()
        globals.info_link = 'https://lafh.org' + job_entry.a['href']
        update_db(organization)

コード例 #16

0

ファイルを表示

ファイル: brilliant_corners.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find_all("div", {"class": "js-job-container"})
    job_class= Job(organization, "")
    job_class.organization_id= organization_id
    insert_count= 0
    for job_entry in jobs_list:
        job_class.title = job_entry.find(
            "span", {"class", "js-job-title"}).a.text
        job_class.info_link = 'https://careers.jobscore.com' + \
            job_entry.find("span", {"class", "js-job-title"}).a['href']
        job_class.location = job_entry.find(
            "span", {"class", "js-job-location"}).text.strip()

        job_soup = get_soup(job_class.info_link)
        job_class.full_or_part = job_soup.find(
            "h2", {"class": "js-subtitle"}).text.split(' | ')[2]
        insert_count+= job_insert(job_class)
    return insert_count

コード例 #17

0

ファイルを表示

ファイル: coalition_for_responsible_community_development.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find_all('div', {'class': 'et_pb_toggle'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list:
        job_class.title = job_entry.find('h5').text.strip()
        job_class.link = url
        insert_count += job_insert(job_class)
    return insert_count

コード例 #18

0

ファイルを表示

ファイル: share_the_self_help_and_recovery_exchange.py プロジェクト: fyliu/jobs-for-hope

def run(url):
    soup = get_soup(url)
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for html_element in soup.find_all('h4'):
        job_class.title = html_element.a.text
        job_class.info_link = html_element.a['href']
        job_class.location = html_element.span.text.split(']')[1]
        insert_count += job_insert(job_class)
    return insert_count

コード例 #19

0

ファイルを表示

def run(url):
    soup = get_soup(url)
    jobs_div = soup.find('h1', text='Careers').parent
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_listing in jobs_div.find_all('a'):
        job_class.title = job_listing.text
        job_class.info_link = job_listing['href']
        insert_count += job_insert(job_class)
    return insert_count

コード例 #20

0

ファイルを表示

def run(url):
    soup = get_soup(url)

    listings_container = soup.find('ul', {'class': 'display-posts-listing'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for listing in listings_container.find_all('li'):
        job_class.title = listing.text
        job_class.info_link = listing.a['href']
        insert_count += job_insert(job_class)
    return insert_count

コード例 #21

0

ファイルを表示

ファイル: weingart_center_association.py プロジェクト: michaelkma/jobs-for-hope

def run(url):
    soup = get_soup(url)

    jobs_container = soup.find(text='Current Openings:').parent.parent.parent
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_listing in jobs_container.find_all('a'):
        job_class.title = job_listing.text
        job_class.info_link = job_listing['href']
        insert_count += job_insert(job_class)
    return insert_count

コード例 #22

0

ファイルを表示

def run(url):
    soup = get_soup("https://www.211la.org/careers")
    jobs_list = soup.find_all("div", {"class": "jobBtn"})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list:
        for child in job_entry.find_all("a"):
            job_class.title = child.text
            job_class.info_link = child.get('href')
        insert_count += job_insert(job_class)
    return insert_count

コード例 #23

0

ファイルを表示

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find('tbody')
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list.find_all('tr'):
        job_details = job_entry.find_all('td')
        job_class.title = job_details[0].find('a').text
        job_class.info_link = job_details[0].find('a')['href']
        job_class.location = job_details[2].text
        insert_count += job_insert(job_class)
    return insert_count

コード例 #24

0

ファイルを表示

ファイル: people-assisting-the-homeless.py プロジェクト: nclairesays/jobs-for-hope

def run(url):
    soup = get_soup(url)

    jobs_list = soup.select('div[class*="JobGrid-"]')[0]

    for job_entry in jobs_list.find_all('a'):
        globals.info_link = 'https://path.catsone.com' + job_entry['href']
        job_row = job_entry.find('div', {'class':'row'})
        job_divs = job_row.find_all('div')
        globals.job_title = job_divs[0].text.strip()
        globals.job_location = clean_location(job_divs[2].text.strip())
        globals.job_zip_code = city_to_zip(globals.job_location)
        update_db(organization)

コード例 #25

0

ファイルを表示

def run(url):
    soup = get_soup(url)
    jobs_div = soup.find('div', {'id': 'yui_3_16_0_ym19_1_1492463820306_5454'})
    job_class= Job(organization, "")
    job_class.organization_id= organization_id
    insert_count= 0
    for job_listing in jobs_div.find_all('p'):
        listing_element = job_listing.find_all('a')
        if len(listing_element) > 0:
            job_class.title = listing_element[0].text
            job_class.info_link = listing_element[0]['href']
            insert_count+= job_insert(job_class)
    return insert_count

コード例 #26

0

ファイルを表示

ファイル: neighborhood_legal_services_of_los_angeles_county.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_soup(url)

    job_listings = soup.find('article').find_all('a')
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_item in job_listings:
        if len(job_item.text.strip()):
            job_class.title = job_item.text.strip()
            job_class.info_link = url + job_item['href']
            insert_count += job_insert(job_class)
    return insert_count

コード例 #27

0

ファイルを表示

def run(url):
    soup = get_javascript_soup(url)

    current_openings = soup.findAll(attrs={"data-tn-element": "jobLink[]"})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for current_opening in current_openings:

        detail_page_link = current_opening.find('a')['href']
        detail_page_soup = get_soup(detail_page_link)
        detail_page_desc = detail_page_soup.find(
            'div', {"data-tn-component": "jobDescription"})

        job_class.title = detail_page_desc.find('h1').text.strip()

        job_summary_parts = detail_page_desc.findAll(['p', 'li'])
        job_class.summary = ' '.join(
            map(lambda a: a.getText(), job_summary_parts[1:-1])).strip()

        job_class.location = detail_page_desc.find(
            'dt', string="Location").findNext().get_text()

        location_parts = job_class.location.split(',')
        if len(location_parts) > 1 and len(
                location_parts[-1]
        ) and location_parts[-1].strip().lower() != 'ca':
            # skip job if state is not CA
            print('Skip location: %s' % job_class.location)
            continue
        job_class.zip_code = city_to_zip(location_parts[0])

        posted_ago = job_summary_parts[-1].get_text().split(' ')
        length = posted_ago[1]
        if (length[-1:] == '+'):
            length = length[:1]
        length = int(length)
        unit = posted_ago[2]
        job_class.post_date = date_ago(length, unit)

        job_class.full_or_part = detail_page_desc.find(
            'dt', string="Job Type").findNext().get_text()

        salary_search = detail_page_desc.find('dt', string="Salary")
        if (salary_search is not None):
            job_class.salary = salary_search.findNext().get_text()

        job_class.info_link = detail_page_link

        insert_count += job_insert(job_class)
    return insert_count

コード例 #28

0

ファイルを表示

ファイル: national_health_foundation.py プロジェクト: weisisheng/jobs-for-hope

def run(url):

    soup = get_soup(url)

    job_listings = soup.find(
        'div', {'class': 'tf-sh-78847e2ef97967b68fdec32a2997ab8f'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_item in job_listings.find_all('a'):
        job_class.title = job_item.text.strip()
        job_class.info_link = job_item['href']
        insert_count += job_insert(job_class)
    return insert_count

コード例 #29

0

ファイルを表示

ファイル: a_community_of_friends.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_javascript_soup(url)
    job_listings = soup.find_all('div', {'class': 'job-listing-job-item'})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_listing in job_listings:
        job_description = job_listing.find_all('span')
        # Get job title and link
        job_class.title = job_description[0].a.text
        job_class.info_link = 'https://recruiting.paylocity.com' + \
            job_description[0].a['href']
        # Get date as string
        date = job_description[1].text
        # Clean up date string by removing trailing -'s, then split and convert
        # to datetime object
        if date[len(date) - 2] == '-':
            date = date[0:len(date) - 3]
        date = date.strip().split('/')
        month = int(date[0])
        day = int(date[1])
        year = int(date[2])
        job_class.post_date = datetime(year, month, day)
        # Get Location
        job_class.location = job_listing.find('div', {
            'class': 'location-column'
        }).span.text
        # Get soup of job listing to scrape more info
        listing_soup = get_soup(job_class.info_link)
        listing_body = listing_soup.find('body').find_all('p')
        # Retrieve Full/Part-time and Salary info if available
        if 'Location' in listing_body[0].text:
            location_string = listing_body[0].text.split(':')[1].lstrip()
            zip_code_result = re.search(r'(\d{5})', location_string)
            if zip_code_result is not None:
                job_class.zip_code = zip_code_result.group(1)
            # can't get city since there's no standard. It could be
            # "Hollywood", "Koreatown, Los angeles, California", or even
            # "Multiple Locations"
        if len(job_class.zip_code) == 0:
            job_class.zip_code = globals.city_to_zip(job_class.location)
        if 'Status' in listing_body[1].text:
            job_class.full_or_part = listing_body[1].text[8:]
        if 'Salary' in listing_body[2].text:
            job_class.salary = listing_body[2].text[14:]
        insert_count += job_insert(job_class)
    return insert_count

コード例 #30

0

ファイルを表示

ファイル: covenant_house.py プロジェクト: weisisheng/jobs-for-hope

def run(url):
    soup = get_soup(url)
    jobs_list = soup.find_all("tr", {"class": "reqitem"})
    job_class = Job(organization, "")
    job_class.organization_id = organization_id
    insert_count = 0
    for job_entry in jobs_list:
        for child in job_entry.find_all("td", {"class": "posTitle"}):
            for child2 in child.find_all("a"):
                job_class.title = child.text
                job_class.info_link = "https://covca.hrmdirect.com/" + \
                    child2.get('href')
        for child in job_entry.find_all("td", {"class": "cities"}):
            job_location = child.text
        if (job_location == "Los Angeles"):
            insert_count += job_insert(job_class)
    return insert_count