def parse_gmail_log_body(dic_my_activity_gmail, gmail_logs): list_gmail_search_logs = TakeoutHtmlParser.find_log_body(gmail_logs) if list_gmail_search_logs != []: idx = 0 for content in list_gmail_search_logs: content = str(content).strip() content = content.replace(u'\xa0', ' ') if idx == 0: if content == 'Searched for': dic_my_activity_gmail['type'] = 'Search' else: dic_my_activity_gmail['type'] = content else: if idx == 1: if content.startswith('<a href="'): idx2 = content.find('">') url = content[9:idx2] url = unquote(url) dic_my_activity_gmail['keyword_url'] = url keyword = content[idx2 + 2:content.find('</a>')] dic_my_activity_gmail[ 'keyword'] = TakeoutHtmlParser.remove_special_char( keyword) elif content.endswith('UTC'): dic_my_activity_gmail[ 'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime( content) idx += 1
def parse_voice_audio_log_body(dic_my_activity_voice_audio, voice_audio_logs): list_voice_audio_event_logs = TakeoutHtmlParser.find_log_body( voice_audio_logs) if list_voice_audio_event_logs != []: idx = 0 for content in list_voice_audio_event_logs: content = str(content).strip() content = content.replace(u'\xa0', ' ') if idx == 0: if content.startswith('Said'): dic_my_activity_voice_audio['type'] = 'Search' if content != 'Said': dic_my_activity_voice_audio['keyword'] = content[ 4:].lstrip() else: dic_my_activity_voice_audio['type'] = content else: if idx == 1 and dic_my_activity_voice_audio[ 'type'] == 'Search': if content.startswith('<a href="'): idx2 = content.find('">') keyword = content[idx2 + 2:content.find('</a>')] dic_my_activity_voice_audio[ 'keyword'] = TakeoutHtmlParser.remove_special_char( keyword) url = content[9:idx2] url = unquote(url) dic_my_activity_voice_audio['keyword_url'] = url elif content.endswith('UTC'): dic_my_activity_voice_audio[ 'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime( content) idx += 1
def parse_youtube_log_body(dic_my_activity_youtube, youtube_logs): list_youtube_event_logs = TakeoutHtmlParser.find_log_body(youtube_logs) if list_youtube_event_logs != []: idx = 0 for content in list_youtube_event_logs: content = str(content).strip() content = content.replace(u'\xa0', ' ') if idx == 0: if content == 'Searched for': dic_my_activity_youtube['type'] = 'Search' elif content.startswith('Watched'): dic_my_activity_youtube['type'] = 'Watch' if len(content) >= 8 and content.find(' ') >= 1: dic_my_activity_youtube[ 'keyword'] = TakeoutHtmlParser.remove_special_char( content) elif content.startswith('Visited'): dic_my_activity_youtube['type'] = 'Visit' if len(content) >= 8 and content.find(' ') >= 1: dic_my_activity_youtube[ 'keyword'] = TakeoutHtmlParser.remove_special_char( content) else: dic_my_activity_youtube['type'] = content else: if idx == 1: if content.startswith('<a href="'): idx2 = content.find('">') keyword = content[idx2 + 2:content.find('</a>')] dic_my_activity_youtube[ 'keyword'] = TakeoutHtmlParser.remove_special_char( keyword) url = content[9:idx2] url = unquote(url) dic_my_activity_youtube[ 'keyword_url'] = TakeoutHtmlParser.remove_special_char( url) else: if dic_my_activity_youtube['type'] == 'Watch': if content.startswith('<a href="'): idx2 = content.find('">') channel_name = content[idx2 + 2:content.find('</a>')] dic_my_activity_youtube[ 'channel_name'] = TakeoutHtmlParser.remove_special_char( channel_name) url = content[9:idx2] url = unquote(url) dic_my_activity_youtube[ 'channel_url'] = TakeoutHtmlParser.remove_special_char( url) if content.endswith('UTC'): dic_my_activity_youtube[ 'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime( content) idx += 1
def parse_analytics_log_body(dic_my_activity_google_analytics, analytics_logs): list_analytics_event_logs = TakeoutHtmlParser.find_log_body( analytics_logs) if list_analytics_event_logs != []: idx = 0 for content in list_analytics_event_logs: content = str(content).strip() content = content.replace(u'\xa0', ' ') if idx == 0: if content == 'Used': dic_my_activity_google_analytics['type'] = 'Use' elif content == 'Visited': dic_my_activity_google_analytics['type'] = 'Visit' else: dic_my_activity_google_analytics['type'] = content else: if idx == 1: if content.startswith('<a href="'): idx2 = content.find('">') keyword = content[idx2 + 2:content.find('</a>')] dic_my_activity_google_analytics[ 'keyword'] = TakeoutHtmlParser.remove_special_char( keyword) url = content[9:idx2] url = unquote(url) dic_my_activity_google_analytics[ 'keyword_url'] = url o = urlparse(url) if o.query.startswith('q=') and o.query.find( '&'): real_url = o.query[2:o.query.find('&')] real_url = unquote(real_url) dic_my_activity_google_analytics[ 'keyword_url'] = real_url o = urlparse(real_url) if o.netloc.startswith('m.'): dic_my_activity_google_analytics[ 'used_device'] = 'mobile' if o.netloc.startswith('m.'): dic_my_activity_google_analytics[ 'used_device'] = 'mobile' elif content.endswith('UTC'): dic_my_activity_google_analytics[ 'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime( content) idx += 1
def parse_assistant_log_body(dic_my_activity_assistant, assistant_logs): list_assistant_search_logs = TakeoutHtmlParser.find_log_body( assistant_logs) if list_assistant_search_logs != []: idx = 0 for content in list_assistant_search_logs: content = str(content).strip() content = content.replace(u'\xa0', ' ') if idx == 0: if content.startswith('Said'): dic_my_activity_assistant['type'] = 'Search' if len(content) >= 5 and content.find(' ') >= 1: keyword = content.split(' ', 1)[1] dic_my_activity_assistant[ 'keyword'] = TakeoutHtmlParser.remove_special_char( keyword) elif content.startswith('Used'): dic_my_activity_assistant['type'] = 'Use' if len(content) >= 5 and content.find(' ') >= 1: keyword = content.split(' ', 1)[1] dic_my_activity_assistant[ 'keyword'] = TakeoutHtmlParser.remove_special_char( keyword) elif content.startswith('Trained'): dic_my_activity_assistant['type'] = 'Train' if len(content) >= 8 and content.find(' ') >= 1: keyword = content.split(' ', 1)[1] dic_my_activity_assistant[ 'keyword'] = TakeoutHtmlParser.remove_special_char( keyword) elif content.startswith('Selected') or content.startswith( 'Listened'): dic_my_activity_assistant['type'] = 'Use' if len(content) >= 9 and content.find(' ') >= 1: dic_my_activity_assistant[ 'keyword'] = TakeoutHtmlParser.remove_special_char( content) else: if idx == 1: if content.startswith('<a href="'): idx2 = content.find('">') keyword = content[idx2 + 2:content.find('</a>')] dic_my_activity_assistant[ 'keyword'] = TakeoutHtmlParser.remove_special_char( keyword) url = content[9:idx2] url = unquote(url) dic_my_activity_assistant['keyword_url'] = url elif content.endswith('UTC'): dic_my_activity_assistant[ 'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime( content) elif idx != 1 and content != '<br/>': if content.startswith('<a href="'): idx2 = content.find('">') keyword = content[idx2 + 2:content.find('</a>')] dic_my_activity_assistant[ 'result'] = TakeoutHtmlParser.remove_special_char( keyword) url = content[9:idx2] url = unquote(url) dic_my_activity_assistant['result_url'] = url o = urlparse(url) else: dic_my_activity_assistant[ 'result'] += TakeoutHtmlParser.remove_special_char( content) idx += 1
def parse_maps_log_body(dic_my_activity_maps, maps_logs): list_maps_event_logs = TakeoutHtmlParser.find_log_body(maps_logs) if list_maps_event_logs != []: idx = 0 for content in list_maps_event_logs: content = str(content).strip() content = content.replace(u'\xa0', ' ') if idx == 0: if content.startswith('<a href="'): url = content[9:content.find('">')] keyword = content.split('>')[1].split('</a')[0] dic_my_activity_maps['keyword'] = keyword.replace( "\"", "\'") if keyword.startswith('View'): dic_my_activity_maps['type'] = 'View' else: dic_my_activity_maps['type'] = 'Search' url = unquote(url) dic_my_activity_maps[ 'keyword_url'] = TakeoutHtmlParser.remove_special_char( url) o = urlparse(url) if o.path.startswith('/maps/@'): list_value = o.path.lstrip('/maps/@').split(',') if list_value != []: latitude = list_value[0] longitude = list_value[1] dic_my_activity_maps[ 'keyword_latitude'] = latitude dic_my_activity_maps[ 'keyword_longitude'] = longitude elif o.path.find('@') >= 1: list_value = o.path.split('@')[1].split(',') if list_value != []: latitude = list_value[0] longitude = list_value[1] dic_my_activity_maps[ 'keyword_latitude'] = latitude dic_my_activity_maps[ 'keyword_longitude'] = longitude elif o.query.find('sll=') >= 1: list_value = o.query.split('sll=', 1)[1].split(',') if list_value != []: latitude = list_value[0] longitude = list_value[1].split('&')[0] dic_my_activity_maps[ 'keyword_latitude'] = latitude dic_my_activity_maps[ 'keyword_longitude'] = longitude else: if content == 'Searched for': dic_my_activity_maps['type'] = 'Search' elif content.startswith('Shared'): dic_my_activity_maps['type'] = 'Share' elif content.startswith('Viewed'): dic_my_activity_maps['type'] = 'View' if content == 'Viewed For you': dic_my_activity_maps[ 'keyword'] = TakeoutHtmlParser.remove_special_char( content) elif content == 'Used Maps': dic_my_activity_maps['type'] = 'Use' dic_my_activity_maps[ 'keyword'] = TakeoutHtmlParser.remove_special_char( content) elif content.startswith('Answered'): dic_my_activity_maps['type'] = 'Answer' dic_my_activity_maps[ 'keyword'] = TakeoutHtmlParser.remove_special_char( content) else: dic_my_activity_maps['type'] = content else: if idx == 1: if content.startswith('<a href="'): idx2 = content.find('">') keyword = content[idx2 + 2:content.find('</a>')] dic_my_activity_maps[ 'keyword'] = TakeoutHtmlParser.remove_special_char( keyword) url = content[9:idx2] url = unquote(url) dic_my_activity_maps[ 'keyword_url'] = TakeoutHtmlParser.remove_special_char( url) o = urlparse(url) if o.path.startswith( '/maps/') and o.path.find('@') >= 1: list_value = o.path.split('@')[1].split(',') if list_value != []: latitude = list_value[0] longitude = list_value[1] dic_my_activity_maps[ 'keyword_latitude'] = latitude dic_my_activity_maps[ 'keyword_longitude'] = longitude elif o.query.find('sll=') >= 1: list_value = o.query.split('sll=', 1)[1].split(',') if list_value != []: latitude = list_value[0] longitude = list_value[1].split('&')[0] dic_my_activity_maps[ 'keyword_latitude'] = latitude dic_my_activity_maps[ 'keyword_longitude'] = longitude else: if content.endswith('UTC'): dic_my_activity_maps[ 'timestamp'] = TakeoutHtmlParser.convert_datetime_to_unixtime( content) elif idx == 4 and dic_my_activity_maps[ 'type'] == '1 notification': dic_my_activity_maps[ 'keyword'] = TakeoutHtmlParser.remove_special_char( content) idx += 1