Esempio n. 1
0
    def scrape_multi_position_job(self, job: Job, job_element: WebElement):
        summary_element = self.find_summary_element(
            job_element, 'pv-entity__company-summary-info')

        #     <h3 class="t-16 t-black t-bold">
        #       <span class="visually-hidden">Company Name</span>
        #       <span>University of Colorado Boulder</span>
        #     </h3>
        # company_element = summary_element.find_element_by_class_name('t-16')
        company_element = summary_element.find_element_by_tag_name('h3')
        company_spans = company_element.find_elements_by_tag_name('span')
        job.company.name = self.canonize_company_name(company_spans[1].text)

        duration_element = summary_element.find_element_by_tag_name('h4')
        duration_spans = duration_element.find_elements_by_tag_name('span')
        job.total_duration = duration_spans[1].text.strip()

        # <ul class="pv-entity__position-group mt2">
        positions_element = job_element.find_element_by_class_name(
            'pv-entity__position-group')
        #     <li class="pv-entity__position-group-role-item">
        positions_items = positions_element.find_elements_by_class_name(
            'pv-entity__position-group-role-item')

        for position_item in positions_items:
            position = Position()

            # <div class="pv-entity__summary-info-v2 pv-entity__summary-info--background-section pv-entity__summary-info-margin-top mb2">
            position_element = position_item.find_element_by_class_name(
                'pv-entity__summary-info--background-section')

            # <h3 class="t-14"> <span>Title</span> <span>Web Designer</span>
            title_element = position_element.find_element_by_tag_name('h3')
            title_spans = title_element.find_elements_by_tag_name('span')
            position.title = title_spans[1].text.strip()

            position.date_range = self.scrape_date_range(position_element)
            position.location = self.scrape_location(position_element)
            position.duration = self.scrape_duration(position_element)

            job.add_position(position)