Ejemplo n.º 1
0
def collect_work_experience(experience_entries):
    """ Collects work experience from page """
    experiences = []
    for entry_box in experience_entries:
        info_elements = lookup_element(entry_box, INFO_SELECTOR)
        description_selected = lookup_element(entry_box, DESCRIPTION_SELECTOR)
        date = collect_element(entry_box, TIME_SELECTOR)
        experiences.append(
            XingWorkExperience(
                position=collect_element(entry_box, POSITION_SELECTOR),
                companyName=content_check(info_elements, 2)
                if date else content_check(info_elements, 1),
                description=content_check(description_selected, 1),
                date=date))
    return experiences
Ejemplo n.º 2
0
def collect_education(page):
    """ Parses persons education page element and extracts data from it """
    education_records = []
    for education in lookup_element(page, EDUCATION_RECORD):
        education_records.append(
            LinkedInEducation(
                facilityName=collect_element(education, EDUCATION_FACILITY),
                degreeName=collect_element(education, EDUCATION_DEGREE),
                specialtyName=collect_element(education, EDUCATION_SPECIALTY),
                dateRange=collect_element(education, EDUCATION_DATERANGE)))
    return education_records
Ejemplo n.º 3
0
def collect_experience(page) -> List[LinkedInWorkExperience]:
    """ Parses work experience page elment and extracts data from it """
    experiences = []
    for experience in lookup_element(page, EXPERIENCE_RECORD):
        inner = lookup_element(experience, EXPERIENCE_INNER_TIMELINE)
        if inner:
            experiences.extend(collect_timeline_experience(experience, inner))
        else:
            experiences.append(
                LinkedInWorkExperience(
                    position=collect_element(experience, EXPERIENCE_POSITION),
                    companyName=collect_element(experience,
                                                EXPERIENCE_COMPANY),
                    dateRange=collect_element(experience,
                                              EXPERIENCE_DATERANGE),
                    timeWorked=collect_element(experience,
                                               EXPERIENCE_DURATION),
                    location=collect_element(experience, EXPERIENCE_LOCATION),
                    description=collect_element(experience,
                                                EXPERIENCE_DESCRIPTION)))
    return experiences
Ejemplo n.º 4
0
def collect_linked_in_page(html: str, link: str):
    """ Gathers data rom LinkedIn page """
    page = fromstring(html)
    name_and_location = lookup_element(page, NAME_LOCATION_SELECTOR)
    current_position = collect_element(page, CURRENT_POSITION)
    experiences = collect_experience(page)
    education_records = collect_education(page)
    return LinkedInAccount(linkedInAccountId=link,
                           name=name_and_location[0].text_content(),
                           currentPosition=current_position,
                           locaton=name_and_location[1].text_content(),
                           linkedInWorkExperiences=experiences,
                           linkedInEducations=education_records)
Ejemplo n.º 5
0
def collect_education(educaion_entries):
    """ Collects education fom page """
    educations = []
    for education in educaion_entries:
        info_elements = lookup_element(education, INFO_SELECTOR)
        date = collect_element(education, TIME_SELECTOR)
        educations.append(
            XingEducation(degree=collect_element(education, POSITION_SELECTOR),
                          schoolName=content_check(info_elements, 2)
                          if date else content_check(info_elements, 1),
                          subject=content_check(info_elements, 3)
                          if date else content_check(info_elements, 2),
                          date=date))
    return educations
Ejemplo n.º 6
0
def collect_tweets(page):
    """ Collects tweet data """
    tweets = []
    for tweet in lookup_element(page, TWEETS):
        datetime_posted = get_tweet_datetime(tweet)
        is_original = check_if_retweet(tweet)
        comments_amount = parse_stat_numbers(
            collect_element(tweet, TWEET_AMOUNT_COMMENTS))
        retweets_amount = parse_stat_numbers(
            collect_element(tweet, TWEET_AMOUNT_RETWEETS))
        likes_amount = parse_stat_numbers(
            collect_element(tweet, TWEET_AMOUNT_LIKES))
        tweet_record = Tweet(text=collect_element(tweet, TWEET_TEXT),
                             datetime=datetime_posted,
                             isOriginal=is_original,
                             amountComments=comments_amount,
                             amountRetweets=retweets_amount,
                             amountLikes=likes_amount)
        tweets.append(tweet_record)
    return tweets
Ejemplo n.º 7
0
def get_tweet_datetime(tweet: tr.Element):
    """ Get tweet timestamp and parse it into datetime object """
    time_element = lookup_element(tweet, TWEET_TIMESTAMP)[0]
    time_value = time_element.get("data-time-ms")
    timestamp = int(time_value) / 1000
    return dt.fromtimestamp(timestamp)
Ejemplo n.º 8
0
def check_if_retweet(tweet: tr.Element) -> bool:
    """ Checks if tweet is original """
    return bool(lookup_element(tweet, TWEET_IS_RETWEET))
Ejemplo n.º 9
0
def collect_tags(inner_element):
    """ Collects data from haves and wants lists """
    haves = map(get_content, lookup_element(inner_element, HAVES))
    wants = map(get_content, lookup_element(inner_element, WANTS))
    return (reduce(compose_list, haves, ""), reduce(compose_list, wants, ""))