コード例 #1
0
ファイル: monster.py プロジェクト: Luckyz7/JobFunnel
 def get(self, parameter: JobField, soup: BeautifulSoup) -> Any:
     """Get a single job attribute from a soup object by JobField
     NOTE: priority is all the same.
     """
     if parameter == JobField.KEY_ID:
         # TODO: is there a way to combine these calls?
         # NOTE: do not use 'data-m_impr_j_jobid' as this is duplicated
         return soup.find('h2', attrs={
             'class': 'title'
         }).find('a').get('data-m_impr_j_postingid')
     elif parameter == JobField.TITLE:
         return soup.find('h2', attrs={'class': 'title'}).text.strip()
     elif parameter == JobField.COMPANY:
         return soup.find('div', attrs={'class': 'company'}).text.strip()
     elif parameter == JobField.LOCATION:
         return soup.find('div', attrs={'class': 'location'}).text.strip()
     elif parameter == JobField.POST_DATE:
         return calc_post_date_from_relative_str(
             soup.find('time').text.strip())
     elif parameter == JobField.URL:
         # NOTE: seems that it is a bit hard to view these links? getting 503
         return str(
             soup.find('a', attrs={
                 'data-bypass': '******'
             }).get('href'))
     else:
         raise NotImplementedError(f"Cannot get {parameter.name}")
コード例 #2
0
 def get(self, parameter: JobField, soup: BeautifulSoup) -> Any:
     """Get a single job attribute from a soup object by JobField
     """
     if parameter == JobField.TITLE:
         return soup.find(
             'a', attrs={'data-tn-element': 'jobTitle'}
         ).text.strip()
     elif parameter == JobField.COMPANY:
         return soup.find('span', attrs={'class': 'company'}).text.strip()
     elif parameter == JobField.LOCATION:
         return soup.find('span', attrs={'class': 'location'}).text.strip()
     elif parameter == JobField.TAGS:
         # tags may not be on page and that's ok.
         table_soup = soup.find(
             'table', attrs={'class': 'jobCardShelfContainer'}
         )
         if table_soup:
             return [
                 td.text.strip() for td in table_soup.find_all(
                     'td', attrs={'class': 'jobCardShelfItem'}
                 )
             ]
         else:
             return []
     elif parameter == JobField.REMOTENESS:
         remote_field = soup.find('span', attrs={'class': 'remote'})
         if remote_field:
             remoteness_str = remote_field.text.strip().lower()
             if remoteness_str in REMOTENESS_STR_MAP:
                 return REMOTENESS_STR_MAP[remoteness_str]
         return Remoteness.UNKNOWN
     elif parameter == JobField.WAGE:
         # We may not be able to obtain a wage
         potential = soup.find('span', attrs={'class': 'salaryText'})
         if potential:
             return potential.text.strip()
         else:
             return ''
     elif parameter == JobField.POST_DATE:
         return calc_post_date_from_relative_str(
             soup.find('span', attrs={'class': 'date'}).text.strip()
         )
     elif parameter == JobField.KEY_ID:
         return ID_REGEX.findall(
             str(
                 soup.find(
                     'a', attrs={'class': 'sl resultLink save-job-link'}
                 )
             )
         )[0]
     else:
         raise NotImplementedError(f"Cannot get {parameter.name}")
コード例 #3
0
ファイル: glassdoor.py プロジェクト: singhviveka/WebScrab
 def get(self, parameter: JobField, soup: BeautifulSoup) -> Any:
     """Get a single job attribute from a soup object by JobField
     TODO: impl div class=compactStars value somewhere.
     """
     if parameter == JobField.TITLE:
         # TODO: we should instead get what user sees in the <span>
         return soup.get('data-normalize-job-title')
     elif parameter == JobField.COMPANY:
         return soup.find(
             'div', attrs={'class', 'jobInfoItem jobEmpolyerName'}
         ).text.strip()
     elif parameter == JobField.LOCATION:
         return soup.get('data-job-loc')
     # FIXME: impl.
     # elif parameter == JobField.TAGS:
     #     labels = soup.find_all('div', attrs={'class', 'jobLabel'})
     #     if labels:
     #         return [
     #             l.text.strip() for l in labels if l.text.strip() != 'New'
     #         ]
     #     else:
     #         return []
     # FIXME: impl JobField.REMOTE
     elif parameter == JobField.POST_DATE:
         return calc_post_date_from_relative_str(
             soup.find(
                 'div', attrs={
                     'class': 'd-flex align-items-end pl-std css-mi55ob'
                 }
             ).text.strip()
         )
     elif parameter == JobField.WAGE:
         # NOTE: most jobs don't have this so we wont raise a warning here
         # and will fail silently instead
         wage = soup.find('span', attrs={'class': 'gray salary'})
         if wage is not None:
             return wage.text.strip()
         else:
             return ''
     elif parameter == JobField.KEY_ID:
         return soup.get('data-id')
     elif parameter == JobField.URL:
         part_url = soup.find(
             'div', attrs={'class', 'logoWrap'}
         ).find('a').get('href')
         return (
             f'https://www.glassdoor.{self.config.search_config.domain}'
             f'{part_url}'
         )
     else:
         raise NotImplementedError(f"Cannot get {parameter.name}")