def _parse_joined_events(d: FacebookJson) -> Iterator[AcceptedEvent]: for event in d["event_responses"]["events_joined"]: yield AcceptedEvent( name=event["name"], starts_dt=parse_datetime_sec(event["start_timestamp"]), ends_dt=parse_datetime_sec(event["end_timestamp"]), )
def _parse_address_book(d: FacebookJson) -> Iterator[Contact]: # remove top-level address book name for addr_book_top in d.values(): for addr_book_list in addr_book_top.values(): for contact in addr_book_list: yield Contact( name=contact["name"], phone_number=contact["details"][0]["contact_point"], created=parse_datetime_sec(contact["created_timestamp"]), updated=parse_datetime_sec(contact["updated_timestamp"]), )
def _parse_authorized_logins(d: FacebookJson) -> Iterator[AdminAction]: for ac in d["recognized_devices"]: metadata = {} if "updated_timestamp" in ac: metadata["updated_at"] = parse_datetime_sec(ac["updated_timestamp"]) yield AdminAction( description="Known Device: {}".format(ac["name"]), dt=parse_datetime_sec(ac["created_timestamp"]), ip=ac["ip_address"], user_agent=ac["user_agent"], metadata=metadata, )
def _parse_messages_in_conversation( messages: List[FacebookJson], ) -> Iterator[Res[Message]]: for m in messages: timestamp = parse_datetime_sec(m["timestamp_ms"] / 1000) author = m["sender_name"] if m["type"] == "Unsubscribe": continue elif m["type"] in ["Generic", "Share"]: # eh, I dont care that much about these in context, can do analysis on my/photos.py on its own if any([k in m for k in ["photos", "sticker"]]): continue elif "content" in m: yield Message( dt=timestamp, author=author, content=m["content"], metadata=m.get("share"), ) # if this just actually doesnt have a field with content for some reason, ignore it elif set(m.keys()).issubset(set(["sender_name", "timestamp_ms", "type"])): continue else: yield RuntimeError( "Not sure how to parse message without 'photos' or 'content': {}".format( m ) ) else: yield RuntimeError("Not sure how to parse message for type: {}".format(m))
def _parse_file(histfile: Path) -> Results: dt: Optional[datetime] = None command_buf = "" # current command for line in histfile.open(encoding="latin-1"): if line.startswith("#"): # parse lines like '#1620931766' # possible string datetime sdt = line[1:].strip() # remove newline try: newdt = parse_datetime_sec(sdt) except Exception as e: logger.debug(f"Error while parsing datetime {e}") else: # this case happens when we successfully parse a datetime line # yield old data, then set newly parsed data to next items datetime if dt is not None: # rstrip \n gets rid of the last newline for each command yield Entry(dt=dt, command=command_buf.rstrip("\n")) # set new datetime for next entry dt = newdt # overwrite command buffer command_buf = "" continue # otherwise, append. this already includes newline command_buf += line # yield final command if dt is not None and command_buf.strip(): yield Entry(dt=dt, command=command_buf.rstrip("\n"))
def _parse_file(histfile: Path) -> LinearResults: with histfile.open("r", encoding="utf-8", newline="") as f: contents = f.read() # convert line breaks to unix style; i.e. broken ^M characters buf = StringIO(contents.replace("\r", "")) csv_reader = csv.reader(buf, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL) while True: try: row = next(csv_reader) yield LinearResult( dt=parse_datetime_sec(row[0]), duration=int(row[1]), application=row[2], window_title=row[3], ) except csv.Error: # some lines contain the NUL byte for some reason... ?? # seems to be x-lib/encoding errors causing malformed application/file names # catch those and ignore them pass except StopIteration: return
def _parse_account_activity(d: FacebookJson) -> Iterator[AdminAction]: for ac in d["account_activity"]: yield AdminAction( description=ac["action"], dt=parse_datetime_sec(ac["timestamp"]), ip=ac["ip_address"], user_agent=ac["user_agent"], )
def _parse_group_comments(d: FacebookJson) -> Iterator[Comment]: for comment in d["comments"]: yield Comment( content=comment["data"][0]["comment"]["comment"], action=comment["title"], dt=parse_datetime_sec(comment["timestamp"]), metadata=comment["data"][0]["comment"]["group"], )
def _parse_json_dump(p: Path) -> Results: for blob in json.loads(p.read_text()): yield Event( event_type="chatlog", dt=parse_datetime_sec(blob["dt"]), channel=blob["channel"], context=blob["message"], )
def _parse_group_posts(d: FacebookJson) -> Iterator[Union[Comment, Post]]: for log_data_list in d.values(): for comm_list in log_data_list.values(): for comm in comm_list: data_keys = comm["data"][0].keys() if "comment" in data_keys: yield Comment( content=comm["data"][0]["comment"]["comment"], action=comm["title"], dt=parse_datetime_sec(comm["timestamp"]), metadata=comm["data"][0]["comment"]["group"], ) else: yield Post( content=comm["data"][0]["post"], action=comm["title"], dt=parse_datetime_sec(comm["timestamp"]), )
def _parse_json_file(p: Path) -> Results: for e_info in json.loads(p.read_text()): dt, meta_tuple = e_info meta_tag, meta_joined = meta_tuple yield Event( dt=parse_datetime_sec(dt), event_tag=meta_tag, metadata=meta_joined.split("|"), )
def _parse_metadata(histline: str) -> Optional[Tuple[datetime, int, str]]: """ parse the date, duration, and command from a line """ matches = PATTERN.match(histline) if matches: g = matches.groups() return (parse_datetime_sec(g[0]), int(g[1]), g[2]) return None
def _parse_admin_records(d: FacebookJson) -> Iterator[AdminAction]: for rec in d["admin_records"]: s = rec["session"] yield AdminAction( description=rec["event"], dt=parse_datetime_sec(s["created_timestamp"]), ip=s["ip_address"], user_agent=s["user_agent"], )
def _parse_photo_ips(d: FacebookJson) -> Iterator[UploadedPhoto]: for photo_info in d["photos"]: if ( "media_metadata" in photo_info and "photo_metadata" in photo_info["media_metadata"] and "upload_ip" in photo_info["media_metadata"]["photo_metadata"] ): yield UploadedPhoto( dt=parse_datetime_sec(photo_info["creation_timestamp"]), ip=photo_info["media_metadata"]["photo_metadata"]["upload_ip"], )
def _parse_achievement(ach: Dict[str, Any], game_name: str) -> Achievement: achieved = ach["progress"]["unlocked"] achieved_on = None # parse datetime if it has it # could possibly throw an error, but its caught above if achieved: achieved_on = parse_datetime_sec(ach["progress"]["data"]) return Achievement( title=ach["title"], description=ach["description"], game_name=game_name, achieved=achieved, achieved_on=achieved_on, icon=ach.get("icon"), )
def _parse_reactions(d: FacebookJson) -> Iterator[Action]: for react in d["reactions"]: yield Action( description=react["title"], dt=parse_datetime_sec(react["timestamp"]) )
def _parse_installed_apps(d: FacebookJson) -> Iterator[Action]: for app in d["installed_apps"]: yield Action( description="{} was installed".format(app["name"]), dt=parse_datetime_sec(app["added_timestamp"]), )
def _parse_app_posts(d: FacebookJson) -> Iterator[Action]: for post in d["app_posts"]: yield Action( description=post["title"], dt=parse_datetime_sec(post["timestamp"]) )
def _parse_deleted_friends(d: FacebookJson) -> Iterator[Friend]: for friend in d["deleted_friends"]: yield Friend( name=friend["name"], dt=parse_datetime_sec(friend["timestamp"]), added=False )
def _parse_posts(d: FacebookJson) -> Iterator[Res[Union[Post, Action]]]: all_posts = d # handle both profile updates and posts if isinstance(all_posts, dict) and "profile_updates" in all_posts: all_posts = all_posts["profile_updates"] for post in all_posts: if "attachments" in post: att = post["attachments"] # e.g. photo with a description # make sure the structure looks like a media post # traverse into the image metadata post to see if we can find a description if len(att) >= 1 and "data" in att[0] and len(att[0]["data"]) >= 1: # make sure each data item has only one item of media if all([len(attach["data"]) == 1 for attach in att]): att_data = [attach["data"][0] for attach in att] # switch, over posts that have descriptions (e.g. me describing what the photo is), and posts that dont for dat in att_data: if "media" in dat: mdat = dat["media"] # image where I described something if "description" in mdat: yield Action( description=mdat["description"], dt=parse_datetime_sec(post["timestamp"]), metadata=mdat, ) # image when I just posted to a album elif "title" in mdat: yield Action( description="Posted to Album {}".format( mdat["title"] ), dt=parse_datetime_sec(post["timestamp"]), metadata=mdat, ) else: yield RuntimeError( "No known way to parse image post {}".format(post) ) elif "place" in dat: # check-in into place if "name" in dat["place"]: yield Action( description="Visited {}".format( dat["place"]["name"] ), dt=parse_datetime_sec(post["timestamp"]), metadata=dat, ) else: yield RuntimeError( "No known way to parse location post {}".format( post ) ) elif "life_event" in dat: # started high school etc. ddat = dat["life_event"] yield Action( description=ddat["title"], dt=parse_datetime_sec(post["timestamp"]), metadata=ddat, ) # third party app event (e.g. Listened to Spotify Song) elif "title" in post: if "external_context" in dat: if "title" in post: yield Action( description=post["title"], dt=parse_datetime_sec(post["timestamp"]), metadata=dat, ) # seems like bad data handling on facebooks part. # these are still events, # but it doesnt have an external context, # its like a stringified version of the data elif "text" in dat: yield Action( description=post["title"], dt=parse_datetime_sec(post["timestamp"]), metadata=dat, ) else: yield RuntimeError( "No known way to parse attachment post with title {}".format( post ) ) else: # unknown data type yield RuntimeError( "No known way to parse data type with attachment {}".format( post ) ) else: # unknown structure yield RuntimeError( "No known way to parse data from post {}".format(post) ) else: yield RuntimeError( "No known way to parse attachment post {}".format(post) ) elif "data" in post and len(post["data"]) == 1: dat = post["data"][0] # basic post I wrote on my timeline if "post" in dat and isinstance(dat["post"], str) and "title" in post: yield Post( content=dat["post"], dt=parse_datetime_sec(post["timestamp"]), action=post["title"], ) elif "profile_update" in dat: yield Action( description="Updated Profile", dt=parse_datetime_sec(post["timestamp"]), metadata=dat["profile_update"], ) else: yield RuntimeError("No known way to parse basic post {}".format(post)) # post without any actual content (e.g. {'timestamp': 1334515711, 'title': 'Sean Breckenridge posted in club'}) # treat this as an action since I have no content here elif set(("timestamp", "title")) == set(post.keys()): yield Action( description=post["title"], dt=parse_datetime_sec(post["timestamp"]) ) else: yield RuntimeError("No known way to parse post {}".format(post))
def _parse_group_activity(d: FacebookJson) -> Iterator[Action]: for gr in d["groups_joined"]: yield Action( description=gr["title"], dt=parse_datetime_sec(gr["timestamp"]), )
def _parse_search_history(d: FacebookJson) -> Iterator[Search]: for search in d["searches"]: assert len(search["data"]) == 1 yield Search( query=search["data"][0]["text"], dt=parse_datetime_sec(search["timestamp"]) )
def _parse_page_likes(d: FacebookJson) -> Iterator[Action]: for page in d["page_likes"]: yield Action( description="Liked Page {}".format(page["name"]), dt=parse_datetime_sec(page["timestamp"]), )