def get(self, parameter: JobField, soup: BeautifulSoup) -> Any: """Get a single job attribute from a soup object by JobField NOTE: priority is all the same. """ if parameter == JobField.KEY_ID: # TODO: is there a way to combine these calls? # NOTE: do not use 'data-m_impr_j_jobid' as this is duplicated return soup.find('h2', attrs={ 'class': 'title' }).find('a').get('data-m_impr_j_postingid') elif parameter == JobField.TITLE: return soup.find('h2', attrs={'class': 'title'}).text.strip() elif parameter == JobField.COMPANY: return soup.find('div', attrs={'class': 'company'}).text.strip() elif parameter == JobField.LOCATION: return soup.find('div', attrs={'class': 'location'}).text.strip() elif parameter == JobField.POST_DATE: return calc_post_date_from_relative_str( soup.find('time').text.strip()) elif parameter == JobField.URL: # NOTE: seems that it is a bit hard to view these links? getting 503 return str( soup.find('a', attrs={ 'data-bypass': '******' }).get('href')) else: raise NotImplementedError(f"Cannot get {parameter.name}")
def get(self, parameter: JobField, soup: BeautifulSoup) -> Any: """Get a single job attribute from a soup object by JobField """ if parameter == JobField.TITLE: return soup.find( 'a', attrs={'data-tn-element': 'jobTitle'} ).text.strip() elif parameter == JobField.COMPANY: return soup.find('span', attrs={'class': 'company'}).text.strip() elif parameter == JobField.LOCATION: return soup.find('span', attrs={'class': 'location'}).text.strip() elif parameter == JobField.TAGS: # tags may not be on page and that's ok. table_soup = soup.find( 'table', attrs={'class': 'jobCardShelfContainer'} ) if table_soup: return [ td.text.strip() for td in table_soup.find_all( 'td', attrs={'class': 'jobCardShelfItem'} ) ] else: return [] elif parameter == JobField.REMOTENESS: remote_field = soup.find('span', attrs={'class': 'remote'}) if remote_field: remoteness_str = remote_field.text.strip().lower() if remoteness_str in REMOTENESS_STR_MAP: return REMOTENESS_STR_MAP[remoteness_str] return Remoteness.UNKNOWN elif parameter == JobField.WAGE: # We may not be able to obtain a wage potential = soup.find('span', attrs={'class': 'salaryText'}) if potential: return potential.text.strip() else: return '' elif parameter == JobField.POST_DATE: return calc_post_date_from_relative_str( soup.find('span', attrs={'class': 'date'}).text.strip() ) elif parameter == JobField.KEY_ID: return ID_REGEX.findall( str( soup.find( 'a', attrs={'class': 'sl resultLink save-job-link'} ) ) )[0] else: raise NotImplementedError(f"Cannot get {parameter.name}")
def get(self, parameter: JobField, soup: BeautifulSoup) -> Any: """Get a single job attribute from a soup object by JobField TODO: impl div class=compactStars value somewhere. """ if parameter == JobField.TITLE: # TODO: we should instead get what user sees in the <span> return soup.get('data-normalize-job-title') elif parameter == JobField.COMPANY: return soup.find( 'div', attrs={'class', 'jobInfoItem jobEmpolyerName'} ).text.strip() elif parameter == JobField.LOCATION: return soup.get('data-job-loc') # FIXME: impl. # elif parameter == JobField.TAGS: # labels = soup.find_all('div', attrs={'class', 'jobLabel'}) # if labels: # return [ # l.text.strip() for l in labels if l.text.strip() != 'New' # ] # else: # return [] # FIXME: impl JobField.REMOTE elif parameter == JobField.POST_DATE: return calc_post_date_from_relative_str( soup.find( 'div', attrs={ 'class': 'd-flex align-items-end pl-std css-mi55ob' } ).text.strip() ) elif parameter == JobField.WAGE: # NOTE: most jobs don't have this so we wont raise a warning here # and will fail silently instead wage = soup.find('span', attrs={'class': 'gray salary'}) if wage is not None: return wage.text.strip() else: return '' elif parameter == JobField.KEY_ID: return soup.get('data-id') elif parameter == JobField.URL: part_url = soup.find( 'div', attrs={'class', 'logoWrap'} ).find('a').get('href') return ( f'https://www.glassdoor.{self.config.search_config.domain}' f'{part_url}' ) else: raise NotImplementedError(f"Cannot get {parameter.name}")