def run(url): soup = get_javascript_soup(url) jobs_list = soup.select('div.wpb_wrapper > p > a') for job_entry in jobs_list: globals.job_title = job_entry.text.strip() globals.info_link = job_entry['href'] update_db(organization)
def run(url): soup = get_javascript_soup(url) job_listings = soup.find_all('div', {'class': 'job-listing-job-item'}) job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_listing in job_listings: job_class.title = job_listing.find('span', { 'class': 'job-item-title' }).a.text.strip() job_class.info_link = 'https://recruiting.paylocity.com' + \ job_listing.find('span', {'class': 'job-item-title'}).a['href'] details = get_soup(job_class.info_link) location = details.find('div', {'class': 'preview-location'}) if location.a: job_class.location = location.a.text zipcode = location.a['href'].split('+')[-1] try: job_class.zip_code = int(zipcode) except ValueError: # generate a zip code if one is not available job_class.zip_code = city_to_zip(job_class.location) else: job_class.location = '' job_class.zip_code = '' job_class.post_date = string_to_date( job_listing.find('div', { 'class': 'job-title-column' }).find_all('span')[1].text.split(' - ')[0]) insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) jobs_list = soup.find( 'table', {'class': 'srJobList'}).tbody.find_all('tr')[1:] job_class= Job(organization, "") job_class.organization_id= organization_id insert_count= 0 for job_entry in jobs_list: job_class.title = job_entry.find( 'td', {'class': 'srJobListJobTitle'}).text.strip() onClickLink = job_entry['onclick'] job_class.info_link = onClickLink[13:len(onClickLink) - 3] job_class.full_or_part = job_entry.find( 'td', {'class': 'srJobListTypeOfEmployment'}).text job_class.location = job_entry.find( 'td', {'class': 'srJobListLocation'}).text location_parts = job_class.location.split(',') if len(location_parts) > 1 and len( location_parts[-1]) and location_parts[-1].strip().lower() != 'ca': # skip job if state is not CA print('Skip location: %s' % job_class.location) continue job_class.zip_code = city_to_zip(location_parts[0]) insert_count+= job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) jobs_list = soup.select('div.entry-content div.small-12.columns > p > a') for job_entry in jobs_list: globals.job_title = job_entry.text globals.info_link = job_entry['href'] update_db(organization)
def run(url): soup = get_javascript_soup(url) jobs_list = soup.select('div.wpb_wrapper > p > a') job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_entry in jobs_list: job_class.title = job_entry.text.strip() job_class.info_link = job_entry['href'] insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) jobs_list = soup.select('div.entry-content div.small-12.columns > p > a') job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_entry in jobs_list: job_class.title = job_entry.text job_class.info_link = job_entry['href'] insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) jobs_list = soup.find('table',{'class':'srJobList'}).tbody.find_all('tr')[1:] for job_entry in jobs_list: globals.job_title = job_entry.find('td',{'class':'srJobListJobTitle'}).text.strip() onClickLink = job_entry['onclick'] globals.info_link = onClickLink[13:len(onClickLink)-3] globals.full_or_part = job_entry.find('td',{'class':'srJobListTypeOfEmployment'}).text globals.job_location = clean_location(job_entry.find('td',{'class':'srJobListLocation'}).text) globals.job_zip_code = city_to_zip(globals.job_location) update_db(organization)
def run(url): soup = get_javascript_soup(url) jobs_list = soup.find('h3', text='Job Opportunities').next_sibling job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_entry in jobs_list.find_all('li'): job_class.title = job_entry.text job_class.info_link = job_entry.a['href'] insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) current_openings = soup.findAll(attrs={"data-tn-element": "jobLink[]"}) job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for current_opening in current_openings: detail_page_link = current_opening.find('a')['href'] detail_page_soup = get_soup(detail_page_link) detail_page_desc = detail_page_soup.find( 'div', {"data-tn-component": "jobDescription"}) job_class.title = detail_page_desc.find('h1').text.strip() job_summary_parts = detail_page_desc.findAll(['p', 'li']) job_class.summary = ' '.join( map(lambda a: a.getText(), job_summary_parts[1:-1])).strip() job_class.location = detail_page_desc.find( 'dt', string="Location").findNext().get_text() location_parts = job_class.location.split(',') if len(location_parts) > 1 and len( location_parts[-1] ) and location_parts[-1].strip().lower() != 'ca': # skip job if state is not CA print('Skip location: %s' % job_class.location) continue job_class.zip_code = city_to_zip(location_parts[0]) posted_ago = job_summary_parts[-1].get_text().split(' ') length = posted_ago[1] if (length[-1:] == '+'): length = length[:1] length = int(length) unit = posted_ago[2] job_class.post_date = date_ago(length, unit) job_class.full_or_part = detail_page_desc.find( 'dt', string="Job Type").findNext().get_text() salary_search = detail_page_desc.find('dt', string="Salary") if (salary_search is not None): job_class.salary = salary_search.findNext().get_text() job_class.info_link = detail_page_link insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 scraping = True while scraping: jobs_list = soup.find_all('article', {'class': 'et_pb_post'}) for job_entry in jobs_list: job_title = job_entry.find('h2', {'class': 'entry-title'}) job_class.title = job_title.text job_class.info_link = job_title.a['href'] job_class.summary = job_entry.find('div', { 'class': 'post-content' }).p.text insert_count += job_insert(job_class) # Check if more job entries on website to scrape if soup.find(text="« Older Entries"): soup = get_javascript_soup( soup.find(text="« Older Entries").parent['href']) else: scraping = False return insert_count
def run(url): soup = get_javascript_soup(url) job_listings = soup.find_all('div', {'class': 'job-listing-job-item'}) job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_listing in job_listings: job_description = job_listing.find_all('span') # Get job title and link job_class.title = job_description[0].a.text job_class.info_link = 'https://recruiting.paylocity.com' + \ job_description[0].a['href'] # Get date as string date = job_description[1].text # Clean up date string by removing trailing -'s, then split and convert # to datetime object if date[len(date) - 2] == '-': date = date[0:len(date) - 3] date = date.strip().split('/') month = int(date[0]) day = int(date[1]) year = int(date[2]) job_class.post_date = datetime(year, month, day) # Get Location job_class.location = job_listing.find('div', { 'class': 'location-column' }).span.text # Get soup of job listing to scrape more info listing_soup = get_soup(job_class.info_link) listing_body = listing_soup.find('body').find_all('p') # Retrieve Full/Part-time and Salary info if available if 'Location' in listing_body[0].text: location_string = listing_body[0].text.split(':')[1].lstrip() zip_code_result = re.search(r'(\d{5})', location_string) if zip_code_result is not None: job_class.zip_code = zip_code_result.group(1) # can't get city since there's no standard. It could be # "Hollywood", "Koreatown, Los angeles, California", or even # "Multiple Locations" if len(job_class.zip_code) == 0: job_class.zip_code = globals.city_to_zip(job_class.location) if 'Status' in listing_body[1].text: job_class.full_or_part = listing_body[1].text[8:] if 'Salary' in listing_body[2].text: job_class.salary = listing_body[2].text[14:] insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) jobs_table = soup.find('table', { 'id': 'careers_table' }).tbody.find_all('tr') job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_row in jobs_table: job_entry = job_row.find_all('td') job_class.title = job_entry[0].a.text job_class.info_link = 'https://theapplicantmanager.com/' + \ job_entry[0].a['href'] job_class.location = job_entry[1].text job_class.full_or_part = job_entry[3].text job_class.post_date = job_entry[4].text insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) jobs_list = soup.find( 'table', {'id': 'cws-search-results'}).find_all('tr')[1:] job_class= Job(organization, "") job_class.organization_id= organization_id insert_count= 0 for job_entry in jobs_list: row_cells = job_entry.find_all('td') job_class.title = row_cells[1].a.text.strip() job_class.info_link = row_cells[1].a['href'] job_class.location = clean_location(row_cells[2].text) job_class.zip_code = city_to_zip(job_class.location) job_soup = get_soup(job_class.info_link) job_class.full_or_part = job_soup.find( text="Employment Duration:").parent.parent.b.text.strip() insert_count+= job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) job_divs = soup.find_all('div', {'class': 'ui-accordion-content'}) job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_div in job_divs: for job_listing in job_div.find_all('li'): job_class.title = job_listing.text.strip() job_class.info_link = 'https://lalgbtcenter.org' + \ job_listing.find_all('a')[-1]['href'] insert_count += job_insert(job_class) job_lists = soup.find_all('ul', {'class': 'ui-accordion-content'}) for job_list in job_lists: for job_listing in job_list.find_all('li'): job_class.title = job_listing.text.strip() job_class.info_link = 'https://lalgbtcenter.org' + \ job_listing.find_all('a')[-1]['href'] insert_count += job_insert(job_class) return insert_count
def run(url): soup = get_javascript_soup(url) job_listings = soup.find_all('div', {'class': 'jobInfo'}) job_class = Job(organization, "") job_class.organization_id = organization_id insert_count = 0 for job_listing in job_listings: job_class.title = job_listing.find('span', { 'class': 'jobTitle' }).a.text.strip() job_class.info_link = 'https://www.paycomonline.net' + \ job_listing.find('span', {'class': 'jobTitle'}).a['href'] if job_listing.find('span', {'class': 'jobLocation'}).text: job_class.location = globals.clean_location( job_listing.find('span', { 'class': 'jobLocation' }).text.split(' - ')[1]) job_class.zip_code = globals.city_to_zip(job_class.location) if job_listing.find('span', {'class': 'jobDescription'}).text: job_class.summary = job_listing.find('span', { 'class': 'jobDescription' }).text.strip() if job_listing.find('span', {'class': 'jobType'}).text: if ('ft' in str( job_listing.find('span', { 'class': 'jobType' }).text).lower()) or ('full' in str( job_listing.find('span', { 'class': 'jobType' }).text).lower()): job_class.full_or_part = 'full' else: job_class.full_or_part = 'part' insert_count += job_insert(job_class) return insert_count