def run(url): soup = get_soup(url) jobs_list = soup.find_all('h4') for job_entry in jobs_list: globals.job_title = job_entry.a.text globals.info_link = job_entry.a['href'] globals.job_summary = globals.info_link listing_soup = get_soup(globals.info_link) if listing_soup.body.find_all('p', string="Job Type: Full-time"): globals.full_or_part = 'Full-time' elif listing_soup.body.find_all('p', string="Job Type: Part-time"): globals.full_or_part = 'Part-time' date_text = listing_soup.body.find_all( 'span', {'class': 'subtitle'})[0].text.split() month_string = date_text[2] day = int(date_text[3][0:len(date_text[3]) - 1]) year = int(date_text[4]) month = month_to_num(month_string) globals.job_post_date = datetime(year, month, day) update_db(organization)
def run(url): soup = get_soup(url) job_grid = soup.find('div', {'class': 'wpjb-job-list'}) job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_div in job_grid.find_all('div', {'class': 'wpjb-col-main'}): major_line = job_div.find('div', {'class': 'wpjb-line-major'}) job_class.title = major_line.a.text job_class.info_link = major_line.a['href'] job_class.full_or_part = major_line.find('span', { 'class': 'wpjb-sub-title' }).text.strip() minor_line = job_div.find('div', {'class': 'wpjb-line-minor'}) job_class.location = minor_line.find('span', { 'class': 'wpjb-job_location' }).text.strip() date = minor_line.find('span', { 'class': 'wpjb-job_created_at' }).text.strip().split(', ') month = month_to_num(date[0]) day = int(date[1]) if month <= datetime.now().month: year = datetime.now().year else: year = datetime.now().year - 1 job_class.post_date = datetime(year, month, day) insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_soup(url) jobs_list = soup.find_all('h4') job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_entry in jobs_list: job_class.title = job_entry.a.text job_class.info_link = job_entry.a['href'] listing_soup = get_soup(job_class.info_link) if listing_soup.body.find_all('p', string="Job Type: Full-time"): job_class.full_or_part = 'Full-time' elif listing_soup.body.find_all('p', string="Job Type: Part-time"): job_class.full_or_part = 'Part-time' date_text = listing_soup.body.find_all( 'span', {'class': 'subtitle'})[0].text.split() month_string = date_text[2] day = int(date_text[3][0:len(date_text[3]) - 1]) year = int(date_text[4]) month = month_to_num(month_string) job_class.post_date = datetime(year, month, day) insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_soup(url) jobs_container = soup.find('dl', {'class': 'employment-opportunities'}) job_class = Job(organization, "") job_class.info_link = url job_class.organization_id = organization_id insert_count = 0 for job_listing in jobs_container.find_all('dt'): job_heading = job_listing.h3.text.split(' Posted ') job_class.title = job_heading[0] job_class.summary = job_listing.p.text date = job_heading[1].split(' ') month = month_to_num(date[0]) day = int(date[1][0:len(date[1]) - 1]) year = int(date[2]) job_class.post_date = datetime(year, month, day) insert_count += job_insert(job_class) return insert_count
info_link = html_element.a['href'] job_summary = info_link listing_soup = get_soup(info_link) if listing_soup.body.find_all('p', string="Job Type: Full-time"): full_or_part = 'Full-time' elif listing_soup.body.find_all('p', string="Job Type: Part-time"): full_or_part = 'Part-time' date_text = listing_soup.body.find_all( 'span', {'class': 'subtitle'})[0].text.split() month_string = date_text[2] day = int(date_text[3][0:len(date_text[3]) - 1]) year = int(date_text[4]) month = month_to_num(month_string) job_post_date = datetime(year, month, day) update_db(organization) reset_vars() reset_vars() # Antelope Valley Domestic Violence Council (Valley Oasis) organization = "Antelope Valley Domestic Violence Council (Valley Oasis)" soup = get_soup("http://www.valleyoasis.org/job-opportunities.html") for html_element in soup.find("div", { "itemtype": "http://schema.org/WebPage"