def collect_work_experience(experience_entries): """ Collects work experience from page """ experiences = [] for entry_box in experience_entries: info_elements = lookup_element(entry_box, INFO_SELECTOR) description_selected = lookup_element(entry_box, DESCRIPTION_SELECTOR) date = collect_element(entry_box, TIME_SELECTOR) experiences.append( XingWorkExperience( position=collect_element(entry_box, POSITION_SELECTOR), companyName=content_check(info_elements, 2) if date else content_check(info_elements, 1), description=content_check(description_selected, 1), date=date)) return experiences
def collect_education(page): """ Parses persons education page element and extracts data from it """ education_records = [] for education in lookup_element(page, EDUCATION_RECORD): education_records.append( LinkedInEducation( facilityName=collect_element(education, EDUCATION_FACILITY), degreeName=collect_element(education, EDUCATION_DEGREE), specialtyName=collect_element(education, EDUCATION_SPECIALTY), dateRange=collect_element(education, EDUCATION_DATERANGE))) return education_records
def collect_experience(page) -> List[LinkedInWorkExperience]: """ Parses work experience page elment and extracts data from it """ experiences = [] for experience in lookup_element(page, EXPERIENCE_RECORD): inner = lookup_element(experience, EXPERIENCE_INNER_TIMELINE) if inner: experiences.extend(collect_timeline_experience(experience, inner)) else: experiences.append( LinkedInWorkExperience( position=collect_element(experience, EXPERIENCE_POSITION), companyName=collect_element(experience, EXPERIENCE_COMPANY), dateRange=collect_element(experience, EXPERIENCE_DATERANGE), timeWorked=collect_element(experience, EXPERIENCE_DURATION), location=collect_element(experience, EXPERIENCE_LOCATION), description=collect_element(experience, EXPERIENCE_DESCRIPTION))) return experiences
def collect_linked_in_page(html: str, link: str): """ Gathers data rom LinkedIn page """ page = fromstring(html) name_and_location = lookup_element(page, NAME_LOCATION_SELECTOR) current_position = collect_element(page, CURRENT_POSITION) experiences = collect_experience(page) education_records = collect_education(page) return LinkedInAccount(linkedInAccountId=link, name=name_and_location[0].text_content(), currentPosition=current_position, locaton=name_and_location[1].text_content(), linkedInWorkExperiences=experiences, linkedInEducations=education_records)
def collect_education(educaion_entries): """ Collects education fom page """ educations = [] for education in educaion_entries: info_elements = lookup_element(education, INFO_SELECTOR) date = collect_element(education, TIME_SELECTOR) educations.append( XingEducation(degree=collect_element(education, POSITION_SELECTOR), schoolName=content_check(info_elements, 2) if date else content_check(info_elements, 1), subject=content_check(info_elements, 3) if date else content_check(info_elements, 2), date=date)) return educations
def collect_tweets(page): """ Collects tweet data """ tweets = [] for tweet in lookup_element(page, TWEETS): datetime_posted = get_tweet_datetime(tweet) is_original = check_if_retweet(tweet) comments_amount = parse_stat_numbers( collect_element(tweet, TWEET_AMOUNT_COMMENTS)) retweets_amount = parse_stat_numbers( collect_element(tweet, TWEET_AMOUNT_RETWEETS)) likes_amount = parse_stat_numbers( collect_element(tweet, TWEET_AMOUNT_LIKES)) tweet_record = Tweet(text=collect_element(tweet, TWEET_TEXT), datetime=datetime_posted, isOriginal=is_original, amountComments=comments_amount, amountRetweets=retweets_amount, amountLikes=likes_amount) tweets.append(tweet_record) return tweets
def get_tweet_datetime(tweet: tr.Element): """ Get tweet timestamp and parse it into datetime object """ time_element = lookup_element(tweet, TWEET_TIMESTAMP)[0] time_value = time_element.get("data-time-ms") timestamp = int(time_value) / 1000 return dt.fromtimestamp(timestamp)
def check_if_retweet(tweet: tr.Element) -> bool: """ Checks if tweet is original """ return bool(lookup_element(tweet, TWEET_IS_RETWEET))
def collect_tags(inner_element): """ Collects data from haves and wants lists """ haves = map(get_content, lookup_element(inner_element, HAVES)) wants = map(get_content, lookup_element(inner_element, WANTS)) return (reduce(compose_list, haves, ""), reduce(compose_list, wants, ""))