def get_single_role(div, summary): """ Scrape details of a single role Args: div: the div element summary: the summary section Returns: A dict of the details for a single role """ title = summary.find_element_by_css_selector('.t-16.t-black.t-bold').text company = summary.find_element_by_class_name('pv-entity__secondary-title').text dates = get_span_text(summary, '.pv-entity__date-range.t-14.t-black--light.t-normal') location = get_optional_text(summary, '.pv-entity__location.t-14.t-black--light.t-normal.block') description = get_description(div, '.pv-entity__description.t-14.t-black.t-normal.ember-view') results = { NAME: company, ENTRIES: [{ TITLE: title, DATES: dates, LOCATION: location, DESCRIPTION: description }] } return results
def get_projects(ul): """ Scrape projects details Args: ul: the ul element Returns: A list of details of all projects """ projects = [] for li in ul.find_elements_by_tag_name('li'): name = li.find_element_by_class_name('pv-accomplishment-entity__title').text.replace('Project name', '').strip() dates = get_optional_text(li, '.pv-accomplishment-entity__date.pv-accomplishment-entity__subtitle', is_span=False) description = get_description(li, '.pv-accomplishment-entity__description.t-14.t-black--light.t-normal').\ lstrip('Project description\n') link = get_accomplishment_link(li) projects.append({ NAME: name, DATES: dates, DESCRIPTION: description, LINK: link }) return projects
def get_volunteering(section): """ Scrape volunteering details Args: section: the volunteering section Returns: A list of details of all volunteering """ ul = get_section(section) vol_dict = defaultdict(list) for li in ul.find_elements_by_tag_name('li'): title = li.find_element_by_css_selector('.t-16.t-black.t-bold').text organisation = get_span_text(li, '.t-14.t-black.t-normal') dates = get_optional_text( li, '.pv-entity__date-range.detail-facet.inline-block.t-14.t-black--light.t-normal') description = get_description(li, '.pv-entity__description.t-14.t-black--light.t-normal.mt4') vol_dict[organisation].append({ TITLE: title, DATES: dates, LOCATION: '', DESCRIPTION: description }) vol_list = [] for organisation in vol_dict: vol_list.append({ NAME: organisation, ENTRIES: vol_dict[organisation] }) return vol_list
def get_projects(ul): """ Scrape projects details Args: ul: the ul element Returns: A list of details of all projects """ projects = [] for li in ul.find_elements_by_tag_name("li"): name = (li.find_element_by_class_name( "pv-accomplishment-entity__title").text.replace( "Project name", "").strip()) dates = get_optional_text( li, ".pv-accomplishment-entity__date.pv-accomplishment-entity__subtitle", is_span=False, ) description = get_description( li, ".pv-accomplishment-entity__description.t-14").lstrip( "Project description\n") link = get_accomplishment_link(li) projects.append({ NAME: name, DATES: dates, DESCRIPTION: description, LINK: link }) return projects
def get_multiple_roles(div, summary): """ Scrape details of multiple roles Args: div: the div element summary: the summary section Returns: A dict of the details for multiple roles """ company = get_span_text(summary, '.t-16.t-black.t-bold') # Show all roles try: div.find_element_by_css_selector( '.pv-profile-section__see-more-inline.pv-profile-section__text-truncate-toggle.link' ).click() time.sleep(1) except NoSuchElementException: pass roles = [] try: role_sections = div.find_elements_by_class_name( 'pv-entity__position-group-role-item') except NoSuchElementException: role_sections = div.find_elements_by_class_name( 'pv-entity__position-group-role-item-fading-timeline') for role_section in role_sections: title = get_span_text(role_section, '.t-14.t-black.t-bold') dates = get_span_text(role_section, '.pv-entity__date-range.t-14.t-black.t-normal') location = get_optional_text( role_section, '.pv-entity__location.t-14.t-black--light.t-normal.block') description = get_description( role_section, '.pv-entity__description.t-14.t-black.t-normal.ember-view') roles.append({ TITLE: title, DATES: dates, LOCATION: location, DESCRIPTION: description }) results = {NAME: company, ENTRIES: roles} return results
def get_education(section): """ Scrape education details Args: section: the education section Returns: A list of details of all educations """ # The ul element can appear in two different classes try: ul = section.find_element_by_css_selector( '.pv-profile-section__section-info.section-info.pv-profile-section__section-info--has-no-more.ember-view' ) except NoSuchElementException: ul = section.find_element_by_css_selector( '.pv-profile-section__section-info.section-info.pv-profile-section__section-info--has-no-more' ) edu_dict = defaultdict(list) for li in ul.find_elements_by_tag_name('li'): school = li.find_element_by_css_selector( '.pv-entity__school-name.t-16.t-black.t-bold').text degree_name = get_span_text( li, '.pv-entity__secondary-title.pv-entity__degree-name.pv-entity__secondary-title.t-14.t-black.t-normal' ) dates = get_optional_text( li, '.pv-entity__dates.t-14.t-black--light.t-normal') description = get_description( li, '.pv-entity__description.t-14.t-black--light.t-normal.mt4') edu_dict[school].append({ DEGREE: get_degree(li, degree_name), LOCATION: '', DATES: dates, DESCRIPTION: description }) edu_list = [] for school in edu_dict: edu_list.append({NAME: school, ENTRIES: edu_dict[school]}) return edu_list
def get_multiple_roles(div, summary): """ Scrape details of multiple roles Args: div: the div element summary: the summary section Returns: A dict of the details for multiple roles """ try: role_sections = div.find_elements_by_class_name( "pv-entity__position-group-role-item") except NoSuchElementException: role_sections = div.find_elements_by_class_name( "pv-entity__position-group-role-item-fading-timeline") roles = [] for role_section in role_sections: title = get_span_text(role_section, ".t-14.t-black.t-bold") dates = get_span_text(role_section, ".pv-entity__date-range.t-14.t-black.t-normal") location = get_optional_text( role_section, ".pv-entity__location.t-14.t-black--light.t-normal.block") description = get_description( role_section, ".pv-entity__description.t-14.t-black.t-normal.ember-view") roles.append({ TITLE: title, DATES: dates, LOCATION: location, DESCRIPTION: description }) results = { NAME: get_span_text(summary, ".t-16.t-black.t-bold"), ENTRIES: roles } return results
def get_education(section): """ Scrape education details Args: section: the education section Returns: A list of details of all educations """ ul = get_section(section) edu_dict = defaultdict(list) for li in ul.find_elements_by_tag_name('li'): school = li.find_element_by_css_selector( '.pv-entity__school-name.t-16.t-black.t-bold').text degree_name = get_span_text( li, '.pv-entity__secondary-title.pv-entity__degree-name.pv-entity__secondary-title.t-14.t-black.t-normal' ) dates = get_optional_text( li, '.pv-entity__dates.t-14.t-black--light.t-normal') description = get_description( li, '.pv-entity__description.t-14.t-black--light.t-normal.mt4') # Check if there is a degree name, if not, skip this entry if not degree_name: continue edu_dict[school].append({ DEGREE: get_degree(li, degree_name), LOCATION: '', DATES: dates, DESCRIPTION: description }) edu_list = [] for school in edu_dict: edu_list.append({NAME: school, ENTRIES: edu_dict[school]}) return edu_list