def get_videos(self): logging.info("Getting Videos...") videos = [] db = os.path.join(self.internal_cache_path, "databases", "video.db") database = Database(db) results = database.execute_query( "select key, extra from video_http_header_t") for entry in results: video = {} video["key"] = entry[0] dump = json.loads(entry[1]) for line in dump["responseHeaders"].splitlines(): if 'Last-Modified:' in line: video["last_modified"] = Utils.date_parser( line.split(": ")[1], "%a, %d %b %Y %H:%M:%S %Z") timeline_event = {} timeline_event["video"] = video["key"] self.timeline.add(video["last_modified"], "video", timeline_event) break self.media.add( os.path.join("internal", "cache", "cache", video["key"])) videos.append(video) logging.info("{} video(s) found".format(len(videos))) return videos
def get_credit_cards(self): logging.info("Getting User credit cards...") db = os.path.join(self.internal_cache_path, "app_webview", "Default", "Web Data") database = Database(db) cards_list = [] cards = database.execute_query( "select name_on_card, expiration_month, expiration_year, card_number_encrypted, date_modified, origin, use_count, use_date from credit_cards;" ) for entry in cards: card = {} card["name"] = entry[0] card["expiration_date"] = "{}/{}".format(entry[1], entry[2]) card["card_number_encrypted"] = str(entry[3]) card["date_modified"] = str(entry[4]) card["origin"] = str(entry[5]) card["use_count"] = entry[6] card["use_date"] = str(entry[7]) timeline_event = {} timeline_event["name"] = card["name"] self.timeline.add(card["use_date"], "AF_creditcard", timeline_event) cards_list.append(card) logging.info("{} credit cards found".format(len(cards_list))) return cards_list
def get_bio_changes(self): logging.info("Get Biography Changes...") db = os.path.join(self.internal_cache_path, "databases", "tinder-3.db") database = Database(db) biography_changes = [] bio_list = database.execute_query( "select old_bio, bio, timestamp from profile_change_bio order by timestamp" ) for entry in bio_list: bio_change = {} bio_change["old"] = entry[0] bio_change["new"] = entry[1] bio_change["createdtime"] = entry[3] timeline_event = {} timeline_event["old"] = bio_change["old"] timeline_event["new"] = bio_change["new"] self.timeline.add(bio_change["createdtime"], "AF_user", timeline_event) biography_changes.append(bio_change) logging.info("{} biography change(s) found".format( len(biography_changes))) return biography_changes
def get_user_matches(self): logging.info("Getting User Matches...") matches = [] db = os.path.join(self.internal_cache_path, "databases", "tinder-3.db") database = Database(db) results = database.execute_query( "select match_id, match_creation_date/1000 , match_last_activity_date, match_person_id, match_person_name, match_person_bio, match_person_birth_date/1000, case when match_is_blocked = 1 then 'Blocked' when match_is_blocked = 0 then 'Not Blocked ' else 'Invalid' end from match_view;" ) for entry in results: match = {} match["id"] = entry[0] match["creation_date"] = entry[1] match["last_activity_date"] = entry[2] match["person_id"] = entry[3] match["person_name"] = entry[4] match["person_bio"] = entry[5] match["person_bithdate"] = entry[6] match["is_blocked"] = entry[7] matches.append(match) timeline_event = {} timeline_event["person_id"] = match["person_id"] timeline_event["person_name"] = match["person_name"] timeline_event["is_blocked"] = match["is_blocked"] self.timeline.add(match["creation_date"], "AF_relation", timeline_event) logging.info("{} matches found".format(len(matches))) return matches
def get_user_messages(self): logging.info("Getting User Messages...") db = os.path.join(self.internal_cache_path, "databases", "tinder-3.db") database = Database(db) messages_list = [] messages = database.execute_query( "select message_to_id, message_from_id , message_text, message_sent_date/1000 as message_sent_date, case when message_is_liked = 0 then 'Not liked' when message_is_liked = 1 then 'Liked' else message_is_liked end, case when message_is_seen = 0 then 'Not seen' when message_is_seen = 1 then 'Seen' else message_is_seen end, message_delivery_status from message_view order by message_sent_date;" ) #getting messages from conversations for entry in messages: message = {} message["to"] = entry[0] message["from"] = entry[1] message["message"] = entry[2] message["created_time"] = entry[3] message["is_liked"] = str(entry[4]) message["is_seen"] = str(entry[5]) message["delivery_status"] = str(entry[6]).lower() messages_list.append(message) timeline_event = {} timeline_event["from"] = message["from"] timeline_event["to"] = message["to"] timeline_event["message"] = message["message"] self.timeline.add(message["created_time"], "AF_message", timeline_event) logging.info("{} messages found".format(len(messages_list))) return messages_list
def get_user_uniqueid_by_id(self, uid): db = os.path.join(self.internal_cache_path, "databases", "db_im_xx") database = Database(db) name = database.execute_query("select UNIQUE_ID from SIMPLE_USER where uid={}".format(uid)) if name: name = name[0][0] else: name = None return name
def get_last_session(self): logging.info("Getting last session...") session = [] relevant_keys = [ "device", "name", "status", "ab_sdk_version", "storage_available_internal_size", "storage_available_external_size", "app_storage_size", "brand", "page", "request_method", "is_first", "duration", "is_first", "rip", "duration", "author_id", "access2", "video_duration", "video_quality", "access", "page_uid", "previous_page", "enter_method", "enter_page", "key_word", "search_keyword", "next_tab", "search_type", "play_duration", "content", "manufacturer", "os_version" ] db = os.path.join(self.internal_cache_path, "databases", "ss_app_log.db") database = Database(db) results = database.execute_query( "select tag, ext_json, datetime(timestamp/1000, 'unixepoch', 'localtime'), session_id from event order by timestamp" ) for entry in results: session_entry = {} session_entry["action"] = entry[0] body_dump = json.loads(entry[1]) session_entry["time"] = Utils.date_parser(entry[2], "%Y-%m-%d %H:%M:%S") session_entry["session_id"] = entry[3] timeline_event = {} timeline_event["action"] = session_entry["action"] self.timeline.add(session_entry["time"], "AF_system", timeline_event) session.append(session_entry) #json body parser body = {} for key, value in body_dump.items(): if key in relevant_keys: body[key] = value session_entry["body"] = body logging.info("{} entrys found".format(len(results))) return session
def get_user_photos(self): logging.info("Get User Photos...") db = os.path.join(self.internal_cache_path, "databases", "tinder-3.db").encode('utf-8') database = Database(db) photos_list = database.execute_query("select image_uri from profile_media;") user_photos =[] for photo in photos_list: user_photos.append(photo[0]) self.media.add(photo[0], True) logging.info("{} photo(s) found".format(len(photos_list))) return user_photos
def get_open_events(self): logging.info("Get application open events...") open_events=[] db = os.path.join(self.internal_cache_path, "databases", "TIKTOK.db") database = Database(db) results = database.execute_query("select open_time/1000 from app_open;") for event in results: open_events.append(event[0]) timeline_event = {} timeline_event["event"]= "Open Application" self.timeline.add(event[0],"AF_system", timeline_event) return open_events
def get_locations(self): logging.info("Getting User locations...") db = os.path.join(self.internal_cache_path, "databases", "legacy_tinder-1.db") database = Database(db) locations_list = [] cards = database.execute_query( "select latitude, longitude, state_province_long, country_short_name, country_long_name, address,route,street_number,city, last_seen_date/1000 as last_seen_date from tinder_locations;" ) for entry in cards: location = {} location["latitude"] = entry[0] location["longitude"] = entry[1] location["province"] = entry[2] location["country_short"] = entry[3] location["country_long"] = entry[4] location["address"] = entry[5] location["route"] = entry[6] location["street_number"] = str(entry[7]) location["city"] = entry[8] location["last_seen_date"] = entry[9] locations_list.append(location) timeline_event = {} timeline_event["latitude"] = location["latitude"] timeline_event["longitude"] = location["longitude"] timeline_event["address"] = location["address"] timeline_event["city"] = location["city"] timeline_event["country_long"] = location["country_long"] self.timeline.add(location["last_seen_date"], "AF_location", timeline_event) self.locations.add(location["last_seen_date"], location["latitude"], location["longitude"]) logging.info("{} locations found".format(len(locations_list))) return locations_list
def get_user_profiles(self): logging.info("Getting User Profiles...") profiles = {} db = os.path.join(self.internal_cache_path, "databases", "db_im_xx") database = Database(db) results = database.execute_query("select UID, UNIQUE_ID, NICK_NAME, AVATAR_THUMB, case when follow_status = 1 then 'Following' when follow_status = 2 then 'Followed and Following ' else 'Invalid' end from SIMPLE_USER") for entry in results: message={} message["uid"] = entry[0] message["uniqueid"] = entry[1] message["nickname"] = entry[2] dump_avatar = json.loads(entry[3]) message["avatar"] = dump_avatar["url_list"][0] message["follow_status"] = entry[4] message["url"] = "https://www.tiktok.com/@{}".format(message["uniqueid"]) profiles[message["uniqueid"]] = message logging.info("{} profiles found".format(len(profiles.keys()))) return profiles
if __name__ == '__main__': """ Initialization of program """ args = parse_arguments() if args.debug: log.basicConfig(level=log.DEBUG) elif args.verbose: log.basicConfig(level=log.INFO) try: confDatabase = pickle.load(open(Glob.confDbFile, 'rb')) while 1: header() db = Database(log, confDatabase) if not db.error: choice = main() if choice == "": break elif choice == "1": substitute_products(list_products(list_categories())) elif choice == "2": list_backup() elif choice == "3": del_backup() db.close() else: print( "\n*** Erreur information de connection base de données ***\n" )
def run(): while True: try: name_folder = input("ID диалога: ") folder_path = os.path.join("messages", name_folder) file_list = os.listdir(folder_path) # Сортировка "folder_list" по цифрам в названии файла, например, "messages50.html" file_list = sorted(file_list, key=lambda number: int(number[8:-5])) break except OSError: print("Неверный путь к папке.") except ValueError: print("Неверный ввод.") # Вспомогательные переменные для работы с файлами и страницами len_file_list = len(file_list) message_order = None start_page = None end_page = None start = None end = None # Получаем пользовательский ввод с какой по какую страницу парсить while True: try: print(f"Всего страниц: {len_file_list}") start_page = int(input("Первая страница (например — 1): ")) end_page = int( input(f"Последняя страница (например — {len_file_list}): ")) print() except ValueError: print("Неверный ввод.") continue if start_page > len_file_list or end_page > len_file_list or end_page < 1 or start_page < 1: print("Неверное количество.") continue break time_start = time.time() # Запуска таймера # Выбирает в каком виде парсить: по восходящей или по нисходящей, в зависимости от выбранных страниц if start_page <= end_page: message_order = 1 start = start_page - 1 end = end_page elif start_page > end_page: file_list.reverse() message_order = -1 start = len_file_list - start_page end = len_file_list - end_page + 1 file_name = f"logs_{start_page}-{end_page}.html" # Название файла для вывода данных # Проверка на существование папки "output" output_folder = "output" current_folder = os.path.join(os.getcwd()) if output_folder not in os.listdir(current_folder): os.mkdir(output_folder) # Проверка на существование вложенной папки внутри папки "output" if name_folder not in os.listdir( os.path.join(current_folder, output_folder)): os.mkdir(os.path.join(current_folder, output_folder, name_folder)) # log_output = open(f"{os.path.join(output, file_name)}", 'w', encoding='utf-8') log_output = open( f"{os.path.join(current_folder, output_folder, name_folder, file_name)}", 'w', encoding='utf-8') # Процесс выполнения программы progress_percent = 0 # Список новых пользователей, которых еще нет в БД и они туда попадут new_users = [] # Перебор всех файлов из списка файлов for file in file_list[start:end]: full_path = os.path.join(folder_path, file) with open( full_path, "rb" ) as opened_file: # Обязательно rb, так как файлы ВК в ANSI, походу raw_html = opened_file.read() html = BeautifulSoup(raw_html, "html.parser") messages = html.find_all("div", class_="message") # Перебор всех сообщений из файла for message in messages[::message_order]: user_name = message.find( "div", class_="message__header").get_text().strip() message_date = re.findall(r",\s.*", user_name)[0].strip(', ') user_name = re.findall(r".+,", user_name)[0].strip(",") # Если сообщение редактировалось, то выводит "(ред.)" в самом конце сообщения edited = "" if re.findall(r"(ред\.)", message_date): message_date = message_date.replace("(ред.)", "").strip() edited = "(ред.)" # Получение ссылок на профили людей ВК # Если ссылки нет, то это собственный профиль, поэтому ссылка на самого себя if user_name == "Вы": user_href = "https://vk.com/id0" else: user_href = message.a.get("href") # Если текущего пользователя нет в БД, то он добавляется в список, из которого потом все добавятся в БД user_data = (user_name, user_href) if user_data not in new_users: new_users.append(user_data) # Получение "грязного" сообщения с html вставками user_message = message.div.find_next("div") # Действие, связанное с беседой, например: закрепление сообщения, обновление фотографии, исключение и т.д. try: sub_user_message = message.find("div", class_="kludges") dialogue_action = sub_user_message.find("a", class_="im_srv_lnk") if dialogue_action: dialogue_action = sub_user_message.get_text().strip() dialogue_action = f"({dialogue_action})" else: dialogue_action = "" except AttributeError: dialogue_action = "" sub_user_message = "" # Удаление лишних html вставок из сообщения user_message = str(user_message) user_message = user_message.replace(str(sub_user_message), "") user_message = user_message[5:-6] user_message = user_message.replace("<br/>", " ") user_message = user_message.replace( "amp;", "") # Убирается спец-символ дубликат (для ссылок) link_keys = [] description_values = [] other_descriptions = [] attachment_description = message.find_all( "div", class_="attachment__description") attachment_link = message.find_all("a", class_="attachment__link") all_links = [] for link in attachment_link: current_link = f'{link.get("href").strip()}' all_links.append(current_link) link_types = ["Фотография", "Видеозапись", "Документ", "Ссылка"] # Добавление любого типа сообщения, который не входит в список "link_types" if attachment_description: for description in attachment_description: current_description = f"{description.get_text().strip()}" if current_description not in link_types: other_descriptions.append(current_description) # Документы, голосовые сообщения, видеозаписи и фотографии - полный порядок! # Шаблоны для поиска в регулярных выражениях pattern_video_url = r"https?://?vk\.com/video\S+" pattern_document_url = r"https?://?vk\.com/doc[^s]\S+" pattern_audio_message_url = r"https?://?cs[0-9]+\.userapi\.com//?u[0-9]+/audiomsg/.+/.+\.ogg" pattern_audio_url = r"https?://?vk\.com/audio\S+" pattern_photo_url_1 = r"https?://?sun[0-9]+-[0-9]+\.userapi\.com/\S+\.jpg" pattern_photo_url_2 = r"https?://?vk\.com/im\?sel=[0-9]+&z=photo[0-9]+_[0-9]*%[0-9]+Fmail[0-9]+" pattern_photo_url_3 = r"https?://?vk\.com/photo\S+" pattern_photo_url_4 = r"https?://?pp\.userapi\.com\S+\.jpg" pattern_other_url = r"https?://?[^\"\s<>]+" # Все найденные ссылки из сообщения found_other_urls = re.findall(pattern_other_url, user_message) # Не пропускает фотографии по шаблону "pattern_photo_url_4", т.к. они дублируют шаблон "pattern_photo_url_1" if found_other_urls and not re.findall(pattern_photo_url_4, user_message): for current_url in found_other_urls: all_links.append(current_url) url_pattern_types = OrderedDict({ pattern_document_url: "Документ", pattern_audio_message_url: "Голосовое сообщение", pattern_photo_url_1: "Фотография", pattern_photo_url_2: "Фотография", pattern_photo_url_3: "Фотография", pattern_photo_url_4: "Фотография", pattern_video_url: "Видеозапись", pattern_audio_url: "Аудиозапись", pattern_other_url: "Ссылка", # "pattern_other_url" обязательно должен стоять последним!!! }) # Проверяет соответствие типа ссылки с полученной ссылкой и полученным списком типов с шаблонами к ним def get_link_and_type(checking_link, url_pattern_type_list): for current_pattern, current_type in url_pattern_type_list.items( ): if re.findall(current_pattern, checking_link) and current_type != "Ссылка": return current_type if current_type == "Ссылка": return False for current_link in all_links: is_other_url = get_link_and_type(current_link, url_pattern_types) if is_other_url: description_values.append(is_other_url) else: description_values.append("Ссылка") link_keys.append(current_link) # Соединение всех ключей-ссылок со значениями-типом ссылки attachments = dict(zip(link_keys, description_values)) # Очистка ключей-ссылок от знаков "#" и "/" в конце ссылки, чтобы оставались только уникальные ссылки # Все найденные ссылки удаляются из самого сообщения и переносятся в словарь "ссылка-тип" for current_link, current_description in attachments.copy().items( ): if current_link in user_message: user_message = user_message.replace(current_link, "") attachments.pop(current_link) cleaned_current_link = current_link.rstrip("#").rstrip("/") attachments[cleaned_current_link] = current_description # Удаляются все совпадения с шаблоном "pattern_photo_url_4" из сообщения for current_link in re.findall(pattern_photo_url_4, user_message): user_message = user_message.replace(current_link, "") # Добавление всех ссылок из сообщения в список справа от спарсенного сообщения # Если никаких ссылок нет, то вставляется пустая невидимая строка output_links = "" if attachments: output_links = [] for current_link in attachments.keys(): output_links.append( f'<a href="{current_link}">{current_link}</a>') descriptions_and_sum = {} descriptions_and_sum_list = [] output_other_descriptions = [] if output_links: output_links = f"({', '.join(output_links)})" attachments_list = list(attachments.values()) # Подсчет количества уникальных типов ссылок for current_description in attachments.values(): count_current_description = attachments_list.count( current_description) descriptions_and_sum[ current_description] = f"{count_current_description}" # Преобразование типа ссылки с добавлением их количества for current_description, count_current_description in descriptions_and_sum.items( ): descriptions_and_sum_list.append( f"({current_description}, {count_current_description} шт.)" ) # Если попался тип "(Аудиозапись)", то проверка, что не повторяется данный тип в "other_descriptions" # В ином случае тип сообщения может быть стикером, записью со стены, прикрепленным или удаленным сообщением for description in other_descriptions: output_description = f"({description}, {other_descriptions.count(description)} шт.)" if description == "Аудиозапись" and output_description not in output_other_descriptions: output_other_descriptions.append(output_description) elif description != "Аудиозапись": output_other_descriptions.append(f"({description})") descriptions_and_sum_list = " ".join(descriptions_and_sum_list) output_other_descriptions = " ".join(output_other_descriptions) # Удаление рабочих html тэгов, чтобы содержимое не воспринималось как html код user_message = user_message.replace("<", "<").replace(">", ">") # Добавление одной строчки записи в файл "log_output" log_output.writelines( f"<a href='{user_href}'>{user_name}</a> — {dialogue_action}{user_message} {descriptions_and_sum_list} {output_links} {output_other_descriptions} ({message_date}) {edited}</br>" ) progress_percent += 1 print("Выполнено {0:.2f}%".format( progress_percent / (abs(end_page - start_page) + 1) * 100)) print( f'\nГотово. Файл "{file_name}" лежит в папке "{os.path.join(output_folder, name_folder)}" рядом с этой программой.\n' ) log_output.close() # Остановка таймера, подсчёт времени time_end = time.time() spent_time = round(time_end - time_start, 2) current_datetime = datetime.datetime.utcnow() current_datetime = current_datetime.strftime("%d/%m/%y %H:%M:%S") name_db = "database.db" db_folder = os.path.join(os.getcwd(), "db") current_directory_list = os.listdir(db_folder) path_db = os.path.join(db_folder, name_db) db = Database(path_db, name_folder) # Если нет БД, то она создасться в относительном пути # Считывается код из схемы и отдается БД на её создание if name_db not in current_directory_list: schema_path = os.path.join(db_folder, "schema.sql") with open(schema_path, 'r') as schema: loaded_schema = schema.read() db.create_database(loaded_schema) # Получение пользователей из БД old_users = db.get_users() # Добавление всех новых пользователей, которых еще нет в БД new_unique_users = set(new_users) - set(old_users) db.add_users(new_unique_users) # Получение текущего диалога в БД dialogue_existence = db.get_dialogue() # Если текущий диалог есть в БД, то обноваляется дата и время его последней обработки # Если нет, то он добавляется if dialogue_existence: db.update_dialogue(current_datetime, spent_time) elif not dialogue_existence: db.add_dialogue(current_datetime, spent_time) db.connection.commit() db.connection.close() print(f"Время выполнения программы в секундах: {spent_time}.\n")
def get_sqlparse(self): logging.info("Getting sqlparse...") return Database.get_drp_output(self.databases, self.report_path)
return entry if __name__ == '__main__': """ Initialization of program """ args = parse_arguments() if args.debug: log.basicConfig(level=log.DEBUG) elif args.verbose: log.basicConfig(level=log.INFO) while 1: try: confDb = pickle.load(open(Glob.confDbFile, 'rb')) db = Database(log, confDb) if not db.error: choice = main() if choice == "": break elif choice == "1": conf_database() elif choice == "2": header("## Creation de la base de données ##\n") db.sql_script('script_create_DB.sql') elif choice == "3": header("## Suppression des données dans la base de données ##\n") db.sql_script('script_erase_DB.sql') elif choice == "4": header("# Insertion des données dans la base de données en cours... #\n") data_create()
def get_undark_db(self): logging.info("Getting undark output...") return Database.get_undark_output(self.databases, self.report_path)
def get_user_messages(self): logging.info("Getting User Messages...") # db1 = os.path.join(self.internal_cache_path, "databases", "db_im_xx") # db2 = None # if not db2: # print("[Tiktok] User Messages database not found!") # return "" # db2 = os.path.join(self.internal_path, db2) # attach = "ATTACH '{}' AS 'db2'".format(db2) conversations_list = [ ] #each entry means 1 conversation, including participants information and messages for db in self.databases: if not db.endswith("_im.db"): continue database = Database(db) conversations_ids_list = database.execute_query( "select conversation_id from conversation_core" ) #list if conversations for conversation in conversations_ids_list: conversation_output = {} id1 = conversation[0].split(':')[2] id2 = conversation[0].split(':')[3] conversation_output["database"] = os.path.basename(db) conversation_output[ "participant_1"] = self.get_user_uniqueid_by_id(id1) conversation_output[ "participant_2"] = self.get_user_uniqueid_by_id(id2) conversation_output["messages"] = [] #messages from conversations messages_list = database.execute_query( "select created_time/1000 as created_time, content as message, case when read_status = 0 then 'Not read' when read_status = 1 then 'Read' else read_status end read_not_read, local_info, type, case when deleted = 0 then 'Not deleted' when deleted = 1 then 'Deleted' else deleted end, sender from msg where conversation_id='{}' order by created_time;" .format(conversation[0])) #getting messages from conversations for entry in messages_list: message = {} message["createdtime"] = entry[0] message["readstatus"] = str(entry[2]) message["localinfo"] = entry[3] if entry[6] == int(id1): message["sender"] = conversation_output[ "participant_1"] message["receiver"] = conversation_output[ "participant_2"] else: message["sender"] = conversation_output[ "participant_2"] message["receiver"] = conversation_output[ "participant_1"] message["type"] = self.get_message_type_by_id(entry[4]) message["message"] = self.parse_body_message_by_id( entry[4], json.loads(entry[1])) message["deleted"] = str(entry[5]) conversation_output["messages"].append(message) timeline_event = {} timeline_event["from"] = message["sender"] timeline_event["to"] = message["receiver"] timeline_event["message"] = message["message"] self.timeline.add(message["createdtime"], "AF_message", timeline_event) #adding conversation and participants information to main array conversations_list.append(conversation_output) logging.info("{} messages found".format( len(conversation_output.get("messages")))) return conversations_list