def parse_friends(user_data: dict) -> dict: """ Goes through posts and parses number of posts """ friends_path = getcwd() + "/temp/friends/friends.json" removed_friends_path = getcwd() + "/temp/friends/removed_friends.json" rejected_friends_path = getcwd( ) + "/temp/friends/rejected_friend_requests.json" try: nbr_of_rejections = len( utils.json_file_converter(rejected_friends_path) ["rejected_requests"]) except FileNotFoundError: nbr_of_rejections = 0 friends_per_month = {} for year in user_data["year_list"]: friends_per_month[year] = utils.year_init() friends_list = utils.json_file_converter(friends_path) friends_total = len(friends_list["friends"]) for new_friend in friends_list["friends"]: month, year = utils.epoch_to_year_and_month(new_friend["timestamp"]) friends_per_month[int(year)][month] += 1 user_data["nbr_of_friends"] = utils.number_prettify(friends_total) try: removed_friends_list = utils.json_file_converter(removed_friends_path) removed_friends_total = len(removed_friends_list["deleted_friends"]) except FileNotFoundError: removed_friends_total = 0 print("Could not parse removed friend data.") monthly_friend_list = [] for year in friends_per_month: monthly_friend_list.extend(list(friends_per_month[year].values())) # add up the friend changes in every month to have one clean curve for i in range(len(monthly_friend_list)): if i > 0: monthly_friend_list[i] += monthly_friend_list[i - 1] print(monthly_friend_list) #initialise dunbar list dunbar_list = [] for i in range(len(monthly_friend_list)): dunbar_list.append(150) user_data["nbr_of_removed_friends"] = utils.number_prettify( removed_friends_total) user_data["monthly_friends"] = monthly_friend_list user_data["dunbars_number"] = dunbar_list user_data["nbr_of_rejections"] = nbr_of_rejections return user_data
def parse_posts(user_data: dict) -> dict: """ Goes through posts and parses number of posts """ posts_per_month = {} for year in user_data["year_list"]: posts_per_month[year] = utils.year_init() posts_path = getcwd() + "/temp/posts/your_posts_1.json" post_list = utils.json_file_converter(posts_path) post_total = len(post_list) if not isinstance(post_list, list): user_data["nbr_of_posts"] = "1" user_data["monthly_posts"] = str(posts_per_month) return user_data for post in post_list: post_timestamp = post["timestamp"] post_month, post_year = utils.epoch_to_year_and_month(post_timestamp) posts_per_month[int(post_year)][post_month] += 1 monthly_post_list = [] for year in posts_per_month: monthly_post_list.extend(list(posts_per_month[year].values())) user_data["nbr_of_posts"] = utils.number_prettify(post_total) user_data["monthly_posts"] = str(monthly_post_list) return user_data
def parse_location_history(user_data: dict) -> dict: """ Goes through locations and translates a list of locations into html string """ locations_path = getcwd() + "/temp/location/location_history.json" try: location_list = utils.json_file_converter(locations_path)["location_history"] except FileNotFoundError: user_data["location_pings"] = "nodata" return user_data html_start = "L.marker([" html_mid = "]).addTo(mymap).bindPopup(\"" html_end = "\");\n" location_pings = "" for ping in location_list: x = str(ping["coordinate"]["latitude"]) y = str(ping["coordinate"]["longitude"]) month, year = utils.epoch_to_year_and_month(ping["creation_timestamp"]) location_pings += html_start + x + ", " + y + html_mid + month + " " + year + html_end user_data["location_pings"] = location_pings return user_data
def parse_off_facebook_activities(user_data: dict) -> dict: """ Goes through off_facebook_activities and parses number of websites and applications that hand over your data to Facebook """ off_facebook_activities_path = getcwd() + "/temp/ads_and_businesses/your_off-facebook_activity.json" off_facebook_activity_list = utils.json_file_converter(off_facebook_activities_path)["off_facebook_activity"] number_of_websites_tracking_the_user = 0 number_of_applications_tracking_the_user = 0 events_per_tracker = {} for tracking_entity in off_facebook_activity_list: if "." in tracking_entity["name"]: # if there's a full stop, assume it's a website and not an application number_of_websites_tracking_the_user += 1 else: number_of_applications_tracking_the_user += 1 events_per_tracker[tracking_entity["name"].encode('latin_1').decode('utf8')] = len(tracking_entity["events"]) # sort dict of events per tracker by descending value order events_per_tracker = {k: v for k, v in sorted(events_per_tracker.items(), reverse=True, key=lambda item: item[1])} events_per_tracker_str = html_tracker_list_builder(events_per_tracker) user_data["nbr_of_websites_tracking"] = utils.number_prettify(number_of_websites_tracking_the_user) user_data["nbr_of_applications_tracking"] = utils.number_prettify(number_of_applications_tracking_the_user) user_data["events_per_tracker"] = events_per_tracker_str return user_data
def parse_interests(user_data: dict) -> dict: """ Goes through interests and parses number of interest per category """ interests_path = getcwd() + "/temp/ads_and_businesses/ads_interests.json" interest_categories_path = getcwd() + "/src/interests.json" user_interests = utils.json_file_converter(interests_path)["topics"] interest_categories = utils.json_file_converter(interest_categories_path) for interest in user_interests: interest = interest.encode('latin_1').decode('utf8') interest_category_count = {} interest_category_total = {} category_list = list(interest_categories.keys()) for category in category_list: interest_category_count[category] = 0 interest_category_total[category] = 0 for category in category_list: for interest in interest_categories[category]: interest_category_total[category] += 1 if interest in user_interests: interest_category_count[category] += 1 # standardise results by dividing by total number of interests per category for category in category_list: interest_category_count[category] = int( (interest_category_count[category] / interest_category_total[category]) * 100) # transform to list interest_count_list = list(interest_category_count.values()) print("interest_category_count: ", interest_category_count) # build a html interest string for the side list of interests html_interests = html_interest_list_builder(user_interests, interest_categories) user_data["interest_categories"] = category_list user_data["interest_count"] = interest_count_list user_data["html_interests"] = html_interests return user_data
def parse_comments(user_data: dict) -> dict: """ Goes through comments and parses number of comments """ comments_path = getcwd() + "/temp/comments/comments.json" comment_list = utils.json_file_converter(comments_path) comment_total = len(comment_list["comments"]) user_data["nbr_of_comments"] = utils.number_prettify(comment_total) return user_data
def parse_peer_group(user_data: dict) -> dict: """ Goes through facial recognition information and parses number of photos used for peer_group """ peer_group_path = getcwd() + "/temp/about_you/friend_peer_group.json" try: peer_group_info = utils.json_file_converter(peer_group_path) except FileNotFoundError: user_data["peer_group"] = "Unknown" return user_data peer_group = peer_group_info["friend_peer_group"] peer_group = peer_group.encode('latin_1').decode('utf8') user_data["peer_group"] = peer_group return user_data
def parse_facial_recognition(user_data: dict) -> dict: """ Goes through facial recognition information and parses number of photos used for facial_recognition """ facial_recognition_path = getcwd( ) + "/temp/about_you/face_recognition.json" try: facial_recognition_info = utils.json_file_converter( facial_recognition_path) except: user_data["nbr_of_fr_photos"] = 0 return user_data nbr_of_fr_photos = facial_recognition_info["facial_data"]["example_count"] user_data["nbr_of_fr_photos"] = utils.number_prettify(nbr_of_fr_photos) return user_data
def parse_ad_interactions(user_data: dict) -> dict: """ Goes through your ad interaction data and parses the number of ad clicks """ ad_interactions_path = getcwd( ) + "/temp/ads_and_businesses/advertisers_you've_interacted_with.json" try: ad_interactions_list = utils.json_file_converter( ad_interactions_path)["history"] except FileNotFoundError: user_data["nbr_of_ads_clicked"] = "0" return user_data number_of_ads_clicked = len(ad_interactions_list) user_data["nbr_of_ads_clicked"] = utils.number_prettify( number_of_ads_clicked) return user_data
def parse_messages(user_data: dict) -> dict: """ Goes through messages and parses number of messages and number of photos """ conversations_directory = getcwd() + "/temp/messages/inbox/" conversation_list = listdir(conversations_directory) message_total = 0 photos_total = 0 message_per_year = {} for year in user_data["year_list"]: message_per_year[year] = 0 message_per_month = {} for year in user_data["year_list"]: message_per_month[year] = utils.year_init() photos_per_month = {} for year in user_data["year_list"]: photos_per_month[year] = utils.year_init() message_photo_paths = [] for conversation_path in conversation_list: print("Parsing conversation: " + conversation_path + "...................", end='\r') message_list_path = conversations_directory + conversation_path + "/message_1.json" photos_path = conversations_directory + conversation_path + "/photos" if path.isdir(photos_path): photos_total += len(listdir(photos_path)) message_list = utils.json_file_converter(message_list_path) message_total += len(message_list["messages"]) for message in message_list["messages"]: message_timestamp = str(message["timestamp_ms"])[:-3] message_month, message_year = utils.epoch_to_year_and_month(message_timestamp) message_per_year[int(message_year)] += 1 message_per_month[int(message_year)][message_month] += 1 if "photos" in message: for photo in message["photos"]: if "http" not in photo["uri"]: # make sure it's not an online picture message_photo_paths.append(getcwd()+"/temp/"+photo["uri"]) photo_month, photo_year = utils.epoch_to_year_and_month(photo["creation_timestamp"]) photos_per_month[int(photo_year)][photo_month] += 1 # check for any nudity in sent/received pictures # convert dictionary of message per years to a list of values corresponding to each year yearly_message_list = list(message_per_year.values()) monthly_message_list = [] for year in message_per_month: monthly_message_list.extend(list(message_per_month[year].values())) monthly_photo_list = [] for year in photos_per_month: monthly_photo_list.extend(list(photos_per_month[year].values())) user_data["nbr_of_messages"] = utils.number_prettify(message_total) user_data["nbr_of_conversations"] = utils.number_prettify(len(conversation_list)) user_data["nbr_of_message_photos"] = utils.number_prettify(photos_total) user_data["nbr_of_photos"] = photos_total user_data["yearly_messages"] = yearly_message_list user_data["monthly_messages_raw"] = monthly_message_list user_data["monthly_messages"] = [x / 10 for x in monthly_message_list] user_data["monthly_photos"] = monthly_photo_list user_data["message_photo_paths"] = message_photo_paths return user_data
def parse_user_info(user_data: dict) -> dict: json_path = getcwd() + "/temp/profile_information/profile_information.json" info = utils.json_file_converter(json_path) profile_data = info["profile"] user_name = profile_data["name"]["full_name"].encode('latin_1').decode( 'utf8') join_year = utils.epoch_to_year(profile_data["registration_timestamp"]) if "relationship" in profile_data: relationship_status = profile_data["relationship"]["status"].encode( 'latin_1').decode('utf8') if "partner" in profile_data["relationship"]: relationship_status += " with " + profile_data["relationship"][ "partner"] relationship_timestamp = utils.epoch_to_year( profile_data["relationship"]["timestamp"]) else: relationship_status = "No data" relationship_timestamp = "No data" # instantiate a list of years [1996, 1997, 1998, ... , 2021] current_year = datetime.date.today().year year_list = list(range(int(join_year), current_year + 1)) # instantiate a list of months [January 1996, February 1996, March 1996, ... , December 2021] month_list = utils.year_init() full_month_list = [] for year in year_list: for month in month_list.keys(): full_month_list.append(month + " " + str(year)) # remove months in the future current_month = datetime.date.today().strftime('%m') current_month_name = utils.number_to_month_name(current_month) current_month_index = full_month_list.index(current_month_name + " " + str(current_year)) full_month_list = full_month_list[:current_month_index] ex_1 = "None" ex_2 = "None" ex_3 = "None" if "previous_relationships" in profile_data: if len(profile_data["previous_relationships"]) > 0: ex_1 = profile_data["previous_relationships"][0]["name"] if len(profile_data["previous_relationships"]) > 1: ex_2 = profile_data["previous_relationships"][1]["name"] if len(profile_data["previous_relationships"]) > 2: ex_3 = profile_data["previous_relationships"][2]["name"] # find number of family member connections on Facebook if "family_members" in profile_data: nbr_of_family_members = len(profile_data["family_members"]) else: nbr_of_family_members = 0 # fill in the info user_data["user_name"] = user_name user_data["join_year"] = join_year user_data["year_list"] = year_list user_data["month_list"] = full_month_list user_data["relationship_status"] = relationship_status user_data["relationship_timestamp"] = relationship_timestamp user_data["ex#1"] = ex_1 user_data["ex#2"] = ex_2 user_data["ex#3"] = ex_3 user_data["nbr_of_family_members"] = nbr_of_family_members return user_data