Beispiel #1
0
def get_single_role(div, summary):
    """
    Scrape details of a single role
    Args:
        div: the div element
        summary: the summary section

    Returns:
        A dict of the details for a single role
    """
    title = summary.find_element_by_css_selector('.t-16.t-black.t-bold').text
    company = summary.find_element_by_class_name('pv-entity__secondary-title').text
    dates = get_span_text(summary, '.pv-entity__date-range.t-14.t-black--light.t-normal')
    location = get_optional_text(summary, '.pv-entity__location.t-14.t-black--light.t-normal.block')
    description = get_description(div, '.pv-entity__description.t-14.t-black.t-normal.ember-view')

    results = {
        NAME: company,
        ENTRIES: [{
            TITLE: title,
            DATES: dates,
            LOCATION: location,
            DESCRIPTION: description
        }]
    }

    return results
Beispiel #2
0
def get_projects(ul):
    """
    Scrape projects details
    Args:
        ul: the ul element

    Returns:
        A list of details of all projects
    """
    projects = []
    for li in ul.find_elements_by_tag_name('li'):
        name = li.find_element_by_class_name('pv-accomplishment-entity__title').text.replace('Project name', '').strip()
        dates = get_optional_text(li, '.pv-accomplishment-entity__date.pv-accomplishment-entity__subtitle',
                                  is_span=False)
        description = get_description(li, '.pv-accomplishment-entity__description.t-14.t-black--light.t-normal').\
            lstrip('Project description\n')
        link = get_accomplishment_link(li)

        projects.append({
            NAME: name,
            DATES: dates,
            DESCRIPTION: description,
            LINK: link
        })

    return projects
Beispiel #3
0
def get_volunteering(section):
    """
    Scrape volunteering details
    Args:
        section: the volunteering section

    Returns:
        A list of details of all volunteering
    """
    ul = get_section(section)
    vol_dict = defaultdict(list)

    for li in ul.find_elements_by_tag_name('li'):
        title = li.find_element_by_css_selector('.t-16.t-black.t-bold').text
        organisation = get_span_text(li, '.t-14.t-black.t-normal')
        dates = get_optional_text(
            li, '.pv-entity__date-range.detail-facet.inline-block.t-14.t-black--light.t-normal')
        description = get_description(li, '.pv-entity__description.t-14.t-black--light.t-normal.mt4')

        vol_dict[organisation].append({
            TITLE: title,
            DATES: dates,
            LOCATION: '',
            DESCRIPTION: description
        })

    vol_list = []
    for organisation in vol_dict:
        vol_list.append({
            NAME: organisation,
            ENTRIES: vol_dict[organisation]
        })

    return vol_list
def get_projects(ul):
    """
    Scrape projects details
    Args:
        ul: the ul element

    Returns:
        A list of details of all projects
    """
    projects = []
    for li in ul.find_elements_by_tag_name("li"):
        name = (li.find_element_by_class_name(
            "pv-accomplishment-entity__title").text.replace(
                "Project name", "").strip())
        dates = get_optional_text(
            li,
            ".pv-accomplishment-entity__date.pv-accomplishment-entity__subtitle",
            is_span=False,
        )
        description = get_description(
            li, ".pv-accomplishment-entity__description.t-14").lstrip(
                "Project description\n")
        link = get_accomplishment_link(li)

        projects.append({
            NAME: name,
            DATES: dates,
            DESCRIPTION: description,
            LINK: link
        })

    return projects
Beispiel #5
0
def get_multiple_roles(div, summary):
    """
    Scrape details of multiple roles
    Args:
        div: the div element
        summary: the summary section

    Returns:
        A dict of the details for multiple roles
    """
    company = get_span_text(summary, '.t-16.t-black.t-bold')

    # Show all roles
    try:
        div.find_element_by_css_selector(
            '.pv-profile-section__see-more-inline.pv-profile-section__text-truncate-toggle.link'
        ).click()
        time.sleep(1)
    except NoSuchElementException:
        pass

    roles = []
    try:
        role_sections = div.find_elements_by_class_name(
            'pv-entity__position-group-role-item')
    except NoSuchElementException:
        role_sections = div.find_elements_by_class_name(
            'pv-entity__position-group-role-item-fading-timeline')

    for role_section in role_sections:
        title = get_span_text(role_section, '.t-14.t-black.t-bold')
        dates = get_span_text(role_section,
                              '.pv-entity__date-range.t-14.t-black.t-normal')
        location = get_optional_text(
            role_section,
            '.pv-entity__location.t-14.t-black--light.t-normal.block')
        description = get_description(
            role_section,
            '.pv-entity__description.t-14.t-black.t-normal.ember-view')

        roles.append({
            TITLE: title,
            DATES: dates,
            LOCATION: location,
            DESCRIPTION: description
        })

    results = {NAME: company, ENTRIES: roles}

    return results
Beispiel #6
0
def get_education(section):
    """
    Scrape education details
    Args:
        section: the education section

    Returns:
        A list of details of all educations
    """

    # The ul element can appear in two different classes
    try:
        ul = section.find_element_by_css_selector(
            '.pv-profile-section__section-info.section-info.pv-profile-section__section-info--has-no-more.ember-view'
        )
    except NoSuchElementException:
        ul = section.find_element_by_css_selector(
            '.pv-profile-section__section-info.section-info.pv-profile-section__section-info--has-no-more'
        )

    edu_dict = defaultdict(list)
    for li in ul.find_elements_by_tag_name('li'):
        school = li.find_element_by_css_selector(
            '.pv-entity__school-name.t-16.t-black.t-bold').text
        degree_name = get_span_text(
            li,
            '.pv-entity__secondary-title.pv-entity__degree-name.pv-entity__secondary-title.t-14.t-black.t-normal'
        )
        dates = get_optional_text(
            li, '.pv-entity__dates.t-14.t-black--light.t-normal')
        description = get_description(
            li, '.pv-entity__description.t-14.t-black--light.t-normal.mt4')

        edu_dict[school].append({
            DEGREE: get_degree(li, degree_name),
            LOCATION: '',
            DATES: dates,
            DESCRIPTION: description
        })

    edu_list = []
    for school in edu_dict:
        edu_list.append({NAME: school, ENTRIES: edu_dict[school]})

    return edu_list
Beispiel #7
0
def get_multiple_roles(div, summary):
    """
    Scrape details of multiple roles
    Args:
        div: the div element
        summary: the summary section

    Returns:
        A dict of the details for multiple roles
    """
    try:
        role_sections = div.find_elements_by_class_name(
            "pv-entity__position-group-role-item")
    except NoSuchElementException:
        role_sections = div.find_elements_by_class_name(
            "pv-entity__position-group-role-item-fading-timeline")

    roles = []
    for role_section in role_sections:
        title = get_span_text(role_section, ".t-14.t-black.t-bold")
        dates = get_span_text(role_section,
                              ".pv-entity__date-range.t-14.t-black.t-normal")
        location = get_optional_text(
            role_section,
            ".pv-entity__location.t-14.t-black--light.t-normal.block")
        description = get_description(
            role_section,
            ".pv-entity__description.t-14.t-black.t-normal.ember-view")

        roles.append({
            TITLE: title,
            DATES: dates,
            LOCATION: location,
            DESCRIPTION: description
        })

    results = {
        NAME: get_span_text(summary, ".t-16.t-black.t-bold"),
        ENTRIES: roles
    }

    return results
Beispiel #8
0
def get_education(section):
    """
    Scrape education details
    Args:
        section: the education section

    Returns:
        A list of details of all educations
    """
    ul = get_section(section)
    edu_dict = defaultdict(list)

    for li in ul.find_elements_by_tag_name('li'):
        school = li.find_element_by_css_selector(
            '.pv-entity__school-name.t-16.t-black.t-bold').text
        degree_name = get_span_text(
            li,
            '.pv-entity__secondary-title.pv-entity__degree-name.pv-entity__secondary-title.t-14.t-black.t-normal'
        )
        dates = get_optional_text(
            li, '.pv-entity__dates.t-14.t-black--light.t-normal')
        description = get_description(
            li, '.pv-entity__description.t-14.t-black--light.t-normal.mt4')

        # Check if there is a degree name, if not, skip this entry
        if not degree_name:
            continue

        edu_dict[school].append({
            DEGREE: get_degree(li, degree_name),
            LOCATION: '',
            DATES: dates,
            DESCRIPTION: description
        })

    edu_list = []
    for school in edu_dict:
        edu_list.append({NAME: school, ENTRIES: edu_dict[school]})

    return edu_list