def run(self): n_files = sum([len(files) for r, d, files in os.walk(self.read_source.folder)]) i = 0 for root, dirs, files in os.walk(self.read_source.folder): if root == '.report': continue for f in files: path = os.path.join(root, f) file_ = File() file_.size = os.path.getsize(path) file_.filename = f file_.extracted_path = os.path.relpath(path, self.read_source.folder) try: file_.creation_time = datetime.fromtimestamp(os.path.getctime(path)) except: file_.creation_time = None try: file_.modify_time = datetime.fromtimestamp(os.path.getmtime(path)) except: file_.modify_time = None try: file_.access_time = datetime.fromtimestamp(os.path.getatime(path)) except: file_.access_time = None if self.calculate_hash: file_.sha256 = self.sha256(path) file_.md5 = self.md5(path) file_.deleted_state = 'Intact' self.add(file_) i += 1 progress(i, n_files) self.commit()
def parse_user_acccounts(self): user_accounts_el = self.root.findall( f".//{self.namespace}model[@type='UserAccount']") if user_accounts_el: n = len(user_accounts_el) print("\nLendo contas de usuário") for i, user_account_el in enumerate(user_accounts_el): progress(i, n) user_account = UserAccount() user_account.deleted_state = user_account_el.attrib[ 'deleted_state'] field = user_account_el.find( f"{self.namespace}field[@name='Name']") value = field.find(f"{self.namespace}value") if field else None user_account.name = value.text if value is not None else None field = user_account_el.find( f"{self.namespace}field[@name='Username']") value = field.find(f"{self.namespace}value") if field else None user_account.username = value.text if value is not None else None field = user_account_el.find( f"{self.namespace}field[@name='Password']") value = field.find(f"{self.namespace}value") if field else None user_account.password = value.text if value is not None else None field = user_account_el.find( f"{self.namespace}field[@name='ServiceType']") value = field.find(f"{self.namespace}value") if field else None user_account.service_type = value.text if value is not None else None self.add(user_account) self.commit()
def run(self): # self.lista = self.getChatsFilename() # print("Lendo chats...") # n = len(self.lista) # for i, item in enumerate(self.lista): # progress(i, n) # self.read_chat(item) self.lista = self.getChatsFilename() n = len(self.lista) print("Lendo chats") pool = Pool(processes=config_manager.data['n_workers']) procs = ({ 'read_source_id': self.read_source.id, 'exp': self.exp, 'chats_path': self.chats_path, 'att_path': self.att_path, 'filename': f } for f in self.lista) for i, _ in enumerate(pool.imap_unordered(chat_worker, procs)): progress(i, n) pool.close() pool.join()
def parse_sms(self): smss_el = self.root.find(f".//{self.namespace}modelType[@type='SMS']") if smss_el: n = len(smss_el) print("\nLendo SMS") for i, sms_el in enumerate(smss_el): progress(i, n) sms = Sms() sms.deleted_state = sms_el.attrib['deleted_state'] field = sms_el.find( f"{self.namespace}field[@name='TimeStamp']") value = field.find(f"{self.namespace}value") if field else None sms.timestamp = parser.parse( value.text) if value is not None else None field = sms_el.find(f"{self.namespace}field[@name='Folder']") value = field.find(f"{self.namespace}value") if field else None sms.folder = value.text if value is not None else None field = sms_el.find(f"{self.namespace}field[@name='Status']") value = field.find(f"{self.namespace}value") if field else None sms.status = value.text if value is not None else None field = sms_el.find(f"{self.namespace}field[@name='Body']") value = field.find(f"{self.namespace}value") if field else None sms.body = value.text if value is not None else None parts_el = sms_el.find( f"{self.namespace}multiModelField[@name='Parties']") if parts_el: for part_el in parts_el: part = SmsPart() part.deleted_state = part_el.attrib['deleted_state'] field = part_el.find( f"{self.namespace}field[@name='Identifier']") value = field.find( f"{self.namespace}value") if field else None part.identifier = value.text if value is not None else None field = part_el.find( f"{self.namespace}field[@name='Name']") value = field.find( f"{self.namespace}value") if field else None part.name = value.text if value is not None else None field = part_el.find( f"{self.namespace}field[@name='Role']") value = field.find( f"{self.namespace}value") if field else None part.role = value.text if value is not None else None self.add(part) sms.parties.append(part) self.add(sms) self.commit()
def parse_call(self): calls_el = self.root.find( f".//{self.namespace}modelType[@type='Call']") if calls_el: n = len(calls_el) print("\nLendo chamadas") for i, call_el in enumerate(calls_el): progress(i, n) call = Call() call.deleted_state = call_el.attrib['deleted_state'] field = call_el.find(f"{self.namespace}field[@name='Type']") value = field.find(f"{self.namespace}value") if field else None call.type_ = value.text if value is not None else None field = call_el.find( f"{self.namespace}field[@name='TimeStamp']") value = field.find(f"{self.namespace}value") if field else None call.timestamp = parser.parse( value.text) if value is not None else None field = call_el.find( f"{self.namespace}field[@name='Duration']") value = field.find(f"{self.namespace}value") if field else None call.duration = duration_parse( value.text) if value is not None else None parts_el = call_el.find( f"{self.namespace}multiModelField[@name='Parties']") if parts_el: for part_el in parts_el: part = CallPart() part.deleted_state = part_el.attrib['deleted_state'] field = part_el.find( f"{self.namespace}field[@name='Identifier']") value = field.find( f"{self.namespace}value") if field else None part.identifier = value.text if value is not None else None field = part_el.find( f"{self.namespace}field[@name='Name']") value = field.find( f"{self.namespace}value") if field else None part.name = value.text if value is not None else None field = part_el.find( f"{self.namespace}field[@name='Role']") value = field.find( f"{self.namespace}value") if field else None part.role = value.text if value is not None else None self.add(part) call.parties.append(part) self.add(call) self.commit()
def parse_files(self): tagged_fiels_el = self.root.find(f".//{self.namespace}taggedFiles") files_el = tagged_fiels_el.findall( f"{self.namespace}file") if tagged_fiels_el else None if files_el: n = len(files_el) print("\nLendo arquivos") for i, file_el in enumerate(files_el): progress(i, n) file_ = File() try: file_.deleted_state = file_el.attrib['deleted'] except: file_.deleted_state = "Unknow" file_.original_path = file_el.attrib['path'] file_.size = file_el.attrib['size'] value = file_el.find( f".//{self.namespace}timestamp[@name='CreationTime']") file_.creation_time = parser.parse( value.text) if value is not None else None value = file_el.find( f".//{self.namespace}timestamp[@name='ModifyTime']") file_.modify_time = parser.parse( value.text) if value is not None else None value = file_el.find( f".//{self.namespace}timestamp[@name='AccessTime']") file_.access_time = parser.parse( value.text) if value is not None else None value = file_el.find( f".//{self.namespace}item[@name='Local Path']") file_.extracted_path = value.text if value is not None else None value = file_el.find( f".//{self.namespace}item[@name='SHA256']") file_.sha256 = value.text if value is not None else None value = file_el.find(f".//{self.namespace}item[@name='MD5']") file_.md5 = value.text if value is not None else None value = file_el.find(f".//{self.namespace}item[@name='Tags']") file_.type_ = value.text if value is not None else None value = file_el.find( f".//{self.namespace}item[@name='ContentType']") file_.content_type = value.text if value is not None else None self.add(file_) self.commit()
def run(self): print("Excluindo entradas de arquivos duplicadas no DB") attachments = db_session.query(File.id).filter( File.message_id != None, File.read_source_id == self.read_source.id).all() chunks_ = list(chunks(attachments, 100)) n = len(chunks_) pool = Pool(processes=config_manager.data['n_workers']) for i, _ in enumerate(pool.imap_unordered(worker, chunks_)): progress(i, n) pool.close() pool.join()
def render_chats(self): print("Renderizando chats") query = db_session.query(Chat.id) chats = self.report_bundle.filter(Chat, query).all() n = len(chats) pool = Pool(processes=config_manager.data['n_workers']) procs = ({'report_bundle': self.report_bundle, 'chat_id': item[0]} for item in chats) for i, _ in enumerate(pool.imap_unordered(chat_worker, procs)): progress(i, n) pool.close() pool.join()
def calculate_hashes(self): self.hash_dict = {} print("Calculando hash dos arquivos na pasta de anexos") pool = Pool(processes=config_manager.data['n_workers']) path = Path(self.attachments_folder) files = [path for path in path.glob('**/*') if path.is_file()] n = len(files) for i, data in enumerate(pool.imap_unordered(hash_worker, files)): progress(i, n) self.hash_dict[data[0]] = data[1] pool.close() pool.join() print("Hashes calculados")
def run(self): if not os.path.exists(self.chats_path): os.mkdir(self.chats_path) if not os.path.exists(self.att_path): os.mkdir(self.att_path) instruct_continue( f"Mova os arquivos que tem o texto das mensagens para a pasta '{self.chats_path}' e os anexos para a pasta '{self.att_path}'") self.lista = self.getChatsFilename() print("Lendo chats...") n = len(self.lista) for i, item in enumerate(self.lista): print(f"\nLendo conversa '{item}'") progress(i, n) self.read_chat(item)
def run(self): self.path_sqlite = Path(self.read_source.folder) / "ChatStorage.sqlite" self.attachments_folder = Path(self.read_source.folder) / "Media" self.conn = sqlite3.connect(str(self.path_sqlite)) self.df_messages = pd.read_sql("SELECT * FROM ZWAMESSAGE", self.conn) self.df_chat_session = pd.read_sql("SELECT * FROM ZWACHATSESSION", self.conn) self.df_profile_push_name = pd.read_sql( "SELECT * FROM ZWAPROFILEPUSHNAME", self.conn) self.df_media_item = pd.read_sql("SELECT * FROM ZWAMEDIAITEM", self.conn) self.df_group_member = pd.read_sql("SELECT * FROM ZWAGROUPMEMBER", self.conn) current_chat = self.df_messages.iloc[0]['ZCHATSESSION'] chat_id, chat_name = self.__get_chat_id_name(current_chat) chat = self.add_chat(chat_id, chat_name) n = self.df_messages.shape[0] for i, item in enumerate( self.df_messages.sort_values(by=['ZCHATSESSION']).iterrows()): row = item[1] progress(i, n) if current_chat != row['ZCHATSESSION']: self.add(chat) chat_id, chat_name = self.__get_chat_id_name( row['ZCHATSESSION']) chat = self.add_chat(chat_id, chat_name) current_chat = row['ZCHATSESSION'] message = Message() jid, from_name = self.__get_from_name(row['ZFROMJID'], row['ZGROUPMEMBER']) message.from_ = self.add_participant(jid, from_name) if not message.from_ in chat.participants and message.from_: chat.participants.append(message.from_) message.timestamp = convert_timestamp(row['ZMESSAGEDATE']) message.read_source = self.read_source message.chat_id = chat.id message.body = row['ZTEXT'] if row['ZMEDIAITEM']: res = self.add_file(row['ZMEDIAITEM']) if res: file, title = res file.message = message message.body = title self.add(file) self.add(message) self.commit()
def parse_chats(self): chats_el = self.root.find( f".//{self.namespace}modelType[@type='Chat']") if chats_el: n = len(chats_el) print("Lendo chats") pool = Pool(processes=config_manager.data['n_workers']) procs = ({ 'read_source_id': self.read_source.id, 'namespace': self.namespace, 'chat_el': chat_el } for chat_el in chats_el) for i, _ in enumerate(pool.imap_unordered(chat_worker, procs)): progress(i, n) pool.close() pool.join()
def parse_contact(self): contacts_el = self.root.find( f".//{self.namespace}modelType[@type='Contact']") if contacts_el: n = len(contacts_el) print("\nLendo Contatos") for i, contact_el in enumerate(contacts_el): progress(i, n) contact = Contact() contact.deleted_state = contact_el.attrib['deleted_state'] field = contact_el.find(f"{self.namespace}field[@name='Name']") value = field.find(f"{self.namespace}value") if field else None contact.name = value.text if value is not None else None field = contact_el.find( f"{self.namespace}field[@name='Source']") value = field.find(f"{self.namespace}value") if field else None contact.source = value.text if value is not None else None entries_el = contact_el.find( f"{self.namespace}multiModelField[@name='Entries']") if entries_el: for entry_el in entries_el: entry = ContactEntry() entry.deleted_state = entry_el.attrib['deleted_state'] field = entry_el.find( f"{self.namespace}field[@name='Category']") value = field.find( f"{self.namespace}value") if field else None entry.category = value.text if value is not None else None field = entry_el.find( f"{self.namespace}field[@name='Value']") value = field.find( f"{self.namespace}value") if field else None entry.value = value.text if value is not None else None self.add(entry) # self.commit() contact.entries.append(entry) self.add(contact) self.commit()
def parse_files(self): tagged_fiels_el = self.root.find(f".//{self.namespace}taggedFiles") files_el = tagged_fiels_el.findall( f"{self.namespace}file") if tagged_fiels_el else None if files_el: chunks_ = list(chunks(files_el, 50)) n = len(chunks_) print("\nLendo arquivos") pool = Pool(processes=config_manager.data['n_workers']) procs = ({ 'read_source_id': self.read_source.id, 'namespace': self.namespace, 'chunk': chunk } for chunk in chunks_) for i, _ in enumerate(pool.imap_unordered(files_worker, procs)): progress(i, n) pool.close() pool.join()
def run(self): if not config_manager.data['thumbnails']['video']: return folder = Path(self.read_source.folder) / "sinf_thumbs" thumbs_generator = ThumbsGenerator(folder) thumbs_generator.set_config( n_rows=config_manager.data['thumbnails']['n_rows'], n_cols=config_manager.data['thumbnails']['n_cols'], thumb_size=config_manager.data['thumbnails']['image_thumb_size'], extension=config_manager.data['thumbnails']['extension']) files = db_session.query(File.id).filter_by(type_='video').filter( File.extracted_path != None, File.read_source == self.read_source).all() print("\nGerando thumbs dos vídeos") chunks_ = list(chunks(files, 2)) n = len(chunks_) pool = Pool(processes=config_manager.data['n_workers']) procs = ((thumbs_generator, chunk) for chunk in chunks_) for i, _ in enumerate(pool.imap_unordered(worker, procs)): progress(i, n) pool.close() pool.join()
def parse_chats(self): n = self.messages.shape[0] print("Lendo mensagens do sqlite") for i, mess in enumerate(self.get_messages()): progress(i, n) chat = self.add_chat(mess['chat']) message = Message() message.from_ = self.add_participant( mess['from']['identifier'], mess['from']['name']) message.body = mess['body'] message.deleted_state = 'Intact' message.timestamp = mess['timestamp'] message.chat_id = chat.id for att in mess['attachments']: attachment = File() attachment.filename = att['name'] attachment.content_type = att['mime_type'] attachment.size = att['size'] attachment.extracted_path = os.path.relpath(att['extracted_path'], self.read_source.folder) attachment.meta_data = att['caption'] self.add(attachment) message.attachments.append(attachment) self.add(message) self.commit()
def run(self): self.__read_database() stm = self.table_messages.select().order_by( sa.asc(self.table_messages.c.thread_key), sa.asc(self.table_messages.c.timestamp_ms)) res_messages = self.conn.execute(stm).fetchall() m = res_messages[0] chat = self.add_chat(m['thread_key'], m['thread_key']) current_thread_key = m['thread_key'] n = len(res_messages) for i, m in enumerate(res_messages): progress(i, n) if current_thread_key != m['thread_key']: db_session.add(chat) chat = self.add_chat(m['thread_key'], m['thread_key']) current_thread_key = m['thread_key'] message = Message() sender = json.loads( m['sender']) if m['sender'] is not None else None if sender: message.from_ = self.add_participant(sender['user_key'], sender['name']) if not message.from_ in chat.participants: chat.participants.append(message.from_) try: timestamp = datetime.fromtimestamp( int(m['timestamp_ms']) / 1000) except: timestamp = None message.timestamp = timestamp message.read_source = self.read_source message.chat_id = chat.id message.body = m['text'] db_session.add(message) db_session.commit()
def run(self): self.__read_database() stm = select(self.cols).select_from( sa.join(self.table_messages, self.table_threads, self.table_messages.c.thread_key == self.table_threads.c.thread_key, isouter=True)).order_by( sa.asc(self.table_messages.c.thread_key), sa.asc(self.table_messages.c.timestamp)) res_messages = self.conn.execute(stm).fetchall() m = res_messages[0] chat = self.add_chat(m['thread_key'], m['thread_name']) current_thread_key = m['thread_key'] n = len(res_messages) for i, m in enumerate(res_messages): progress(i, n) if current_thread_key != m['thread_key']: db_session.add(chat) chat = self.add_chat(m['thread_key'], m['thread_name']) current_thread_key = m['thread_key'] message = Message() message.from_ = self.add_participant(m['user_id'], m['sender']) if not message.from_ in chat.participants: chat.participants.append(message.from_) try: timestamp = datetime.fromtimestamp(int(m['timestamp']) / 1000) except: timestamp = None message.timestamp = timestamp message.read_source = self.read_source message.chat_id = chat.id message.body = m['snippet'] db_session.add(message) db_session.commit()
def parse_chats(self): chats_el = self.root.find( f".//{self.namespace}modelType[@type='Chat']") if chats_el: n = len(chats_el) print("Lendo chats") for i, chat_el in enumerate(chats_el): progress(i, n) chat = Chat() chat.deleted_state = chat_el.attrib['deleted_state'] field = chat_el.find(f"{self.namespace}field[@name='Id']") value = field.find(f"{self.namespace}value") if field else None chat.identifier = value.text if value is not None else "" field = chat_el.find(f"{self.namespace}field[@name='Name']") value = field.find(f"{self.namespace}value") if field else None chat.name = value.text if value is not None else "" field = chat_el.find(f"{self.namespace}field[@name='Source']") value = field.find(f"{self.namespace}value") if field else None chat.source = value.text if value is not None else "" field = chat_el.find( f"{self.namespace}field[@name='StartTime']") value = field.find(f"{self.namespace}value") if field else None chat.start_time = parser.parse( value.text) if value is not None else None field = chat_el.find( f"{self.namespace}field[@name='LastActivity']") value = field.find(f"{self.namespace}value") if field else None chat.last_activity = parser.parse( value.text) if value is not None else None field = chat_el.find( f"{self.namespace}multiModelField[@name='Participants']") for participant_el in field: field = participant_el.find( f"{self.namespace}field[@name='Identifier']") value = field.find( f"{self.namespace}value") if field else None identifier = value.text if value is not None else None field = participant_el.find( f"{self.namespace}field[@name='Name']") value = field.find( f"{self.namespace}value") if field else None name = value.text if value is not None else None participant = self.add_participant(identifier, name) if not participant in chat.participants: chat.participants.append(participant) field = chat_el.find( f"{self.namespace}multiModelField[@name='Messages']") if field: for j, message_el in enumerate(field): message = Message() message.deleted_state = message_el.attrib[ 'deleted_state'] field = message_el.find( f"{self.namespace}field[@name='Body']") value = field.find( f"{self.namespace}value") if field else None message.body = value.text if value is not None else None field = message_el.find( f"{self.namespace}field[@name='TimeStamp']") value = field.find( f"{self.namespace}value") if field else None message.timestamp = parser.parse( value.text) if value is not None else None from_el = message_el.find( f"{self.namespace}modelField[@name='From']").find( f"{self.namespace}model") from_identifier = from_name = None if from_el: field = from_el.find( f"{self.namespace}field[@name='Identifier']") value = field.find( f"{self.namespace}value") if field else None from_identifier = value.text if value is not None else None field = from_el.find( f"{self.namespace}field[@name='Name']") value = field.find( f"{self.namespace}value") if field else None from_name = value.text if value is not None else None message.from_ = self.add_participant( from_identifier, from_name) attachments_el = message_el.find( f"{self.namespace}multiModelField[@name='Attachments']" ) if attachments_el: for attachment_el in attachments_el: attachment = File() attachment.deleted_state = attachment_el.attrib[ 'deleted_state'] field = attachment_el.find( f"{self.namespace}field[@name='Filename']") value = field.find(f"{self.namespace}value" ) if field else None attachment.filename = value.text if value is not None else None field = attachment_el.find( f"{self.namespace}field[@name='attachment_extracted_path']" ) value = field.find(f"{self.namespace}value" ) if field else None attachment.extracted_path = value.text if value is not None else None field = attachment_el.find( f"{self.namespace}field[@name='ContentType']" ) value = field.find(f"{self.namespace}value" ) if field else None attachment.content_type = value.text if value is not None else None field = attachment_el.find( f"{self.namespace}field[@name='MetaData']") value = field.find(f"{self.namespace}value" ) if field else None attachment.meta_data = value.text if value is not None else None self.add(attachment) message.attachments.append(attachment) self.add(message) chat.messages.append(message) self.add(chat) self.commit()