def _redirect_messages_to_save(self, data, slip): data = data.read().decode() json_data = json.loads(data) if "messages" in json_data: for message in json_data["messages"]: download_uri = self.get_message_uri(message) _filetitle = message["id"] + ".txt" filetitle = Common.safe_file_name(_filetitle) self.file_size_bytes += int(message["sizeEstimate"]) if filetitle != filetitle: self.project.log( "exception", "Normalized '{}' to '{}'".format( _filetitle, filetitle), "warning", True) self.content_downloader.put( Downloader.DownloadSlip(download_uri, message, filetitle, "snippet"))
def sync(self): d1 = datetime.now() self.d = Downloader.Downloader self.content_downloader = Downloader.Downloader self.meta_downloader = Downloader.Downloader( self.project, self.oauth_provider.http_intercept, self._save_metadata, self.oauth_provider.get_auth_header, self.project.threads) if self.project.args.mode == "full": self.project.log("transaction", "Full acquisition initiated", "info", True) self.d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._redirect_messages_to_save, self.oauth_provider.get_auth_header, self.project.threads) self.content_downloader = Downloader.Downloader( self.project, self.oauth_provider.http_intercept, self._save_raw_mail, self.oauth_provider.get_auth_header, self.project.threads) else: self.project.log("transaction", "Metadata acquisition initiated", "info", True) self.initialize_items() cnt = len(self.threads) self.project.log( "transaction", "Total threads queued for acquisition: {}".format(cnt), "info", True) self.metadata() for thread in self.threads: self.project.log("transaction", 'Calculating "{}"'.format(thread['snippet']), "info", True) savepath = "" if self.project.args.mode == "full": download_uri = self.get_thread_uri(thread, "minimal") self.d.put( Downloader.DownloadSlip(download_uri, thread, savepath, 'id')) meta_uri = self.get_thread_uri(thread, "metadata") self.meta_downloader.put( Downloader.DownloadSlip(meta_uri, thread, savepath, 'id')) if self.project.args.mode == "full": self.d.start() self.d.wait_for_complete() self.project.log( "transaction", "Total size of mail to be acquired is {}".format( Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True) self.mbox_dir = os.path.join(self.project.acquisition_dir, "mbox") os.makedirs(self.mbox_dir, exist_ok=True) if self.project.args.prompt: IO.get("Press ENTER to begin acquisition...") if self.project.args.mode == "full": self.content_downloader.start() self.content_downloader.wait_for_complete() self.meta_downloader.start() self.meta_downloader.wait_for_complete() d2 = datetime.now() delt = d2 - d1 self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)
def sync(self): d1 = datetime.now() d = Downloader.Downloader if self.project.args.mode == "full": self.project.log("transaction", "Full acquisition initiated", "info", True) d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_file, self.oauth_provider.get_auth_header, self.project.threads) else: self.project.log("transaction", "Metadata acquisition initiated", "info", True) self.initialize_items() cnt = len(self.files) self.project.log("transaction", "Total items queued for acquisition: " + str(cnt), "info", True) self.metadata() trash_folder = os.path.join(self.project.acquisition_dir, "trash") trash_metadata_folder = os.path.join(self.project.acquisition_dir, "trash_metadata") for file in self.files: self.project.log("transaction", "Calculating " + file['title'], "info", True) download_uri = self._get_download_url(file) parentmap = self._get_parent_mapping(file, self.files) filetitle = self._get_file_name(file) if filetitle != file['title']: self.project.log( "exception", "Normalized '" + file['title'] + "' to '" + filetitle + "'", "warning", True) if file['labels']['trashed'] == True: save_download_path = os.path.join(trash_folder, parentmap) save_metadata_path = os.path.join(trash_metadata_folder, parentmap) save_download_path = os.path.normpath( os.path.join(save_download_path, filetitle)) save_metadata_path = os.path.normpath( os.path.join(save_metadata_path, filetitle + '.json')) else: save_download_path = os.path.normpath( os.path.join( os.path.join(self.project.project_folders["data"], parentmap), filetitle)) save_metadata_path = os.path.normpath( os.path.join( os.path.join(self.project.project_folders["metadata"], parentmap), filetitle + ".json")) save_download_path = Common.assert_path(save_download_path, self.project) save_metadata_path = Common.assert_path(save_metadata_path, self.project) if self.project.args.mode == "full": if save_download_path: v = { "remote_file": os.path.join(parentmap, file['title']), "local_file": save_download_path } download_file = True if 'md5Checksum' in file: v['remote_hash'] = file['md5Checksum'] if os.path.isfile(save_download_path): if 'md5Checksum' in file: file_hash = Common.hashfile( open(save_download_path, 'rb'), hashlib.md5()) if file_hash == file['md5Checksum']: download_file = False self.project.log( "exception", "Local and remote hash matches for " + file['title'] + " ... Skipping download", "warning", True) else: self.project.log( "exception", "Local and remote hash differs for " + file['title'] + " ... Queuing for download", "critical", True) else: self.project.log( "exception", "No hash information for file ' " + file['title'] + "'", "warning", True) if download_file and download_uri: self.project.log( "transaction", "Queueing " + file['title'] + " for download...", "info", True) d.put( Downloader.DownloadSlip(download_uri, file, save_download_path, 'title')) if 'fileSize' in file: self.file_size_bytes += int(file['fileSize']) # If it's a file we can add it to verification file if download_uri: self.verification.append(v) if save_metadata_path: self._save_file( json.dumps(file, sort_keys=True, indent=4), Downloader.DownloadSlip(download_uri, file, save_metadata_path, 'title'), False) self.project.log( "transaction", "Total size of files to be acquired is {}".format( Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True) if self.project.args.prompt: IO.get("Press ENTER to begin acquisition...") d.start() d.wait_for_complete() d2 = datetime.now() delt = d2 - d1 self.verify() self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)
def sync(self): d1 = datetime.now() d = Downloader.Downloader(self.project, self.oauth_provider.http_intercept, self._save_file, self.oauth_provider.get_auth_header, self.project.threads) if self.project.args.mode == "full": self.project.log("transaction", "Full acquisition initiated", "info", True) else: self.project.log("transaction", "Metadata acquisition initiated", "info", True) self.initialize_items() cnt = len(self.files) self.project.log("transaction", "Total items queued for acquisition: " + str(cnt), "info", True) self.metadata() for file in self.files: self.project.log("transaction", "Calculating " + file['path'], "info", True) if file['is_dir'] == False: download_uri = lambda f=file: self._get_download_uri(f) metadata_download_uri = self.oauth_provider.config[ 'API_ENDPOINT'] + '/metadata/auto' + file['path'] parentmap = self._get_parent_mapping(file) filetitle = self._get_file_name(file) orig = os.path.basename(file['path']) if filetitle != orig: self.project.log( "exception", "Normalized '{}' to '{}'".format(orig, filetitle), "warning", True) if 'bytes' in file: self.file_size_bytes += int(file['bytes']) save_metadata_path = Common.assert_path( os.path.normpath( os.path.join( os.path.join( self.project.project_folders['metadata'], parentmap), filetitle + ".json")), self.project) if save_metadata_path: self.project.log( "transaction", "Queueing {} for download...".format(orig), "info", True) d.put( Downloader.DownloadSlip(metadata_download_uri, file, save_metadata_path, 'path')) if self.project.args.mode == "full": save_download_path = Common.assert_path( os.path.normpath( os.path.join( os.path.join( self.project.project_folders['data'], parentmap), filetitle)), self.project) if save_download_path: self.project.log( "transaction", "Queueing {} for download...".format(orig), "info", True) d.put( Downloader.DownloadSlip(download_uri, file, save_download_path, 'path')) self.project.log( "transaction", "Total size of files to be acquired is {}".format( Common.sizeof_fmt(self.file_size_bytes, "B")), "highlight", True) if self.project.args.prompt: IO.get("Press ENTER to begin acquisition...") d.start() d.wait_for_complete() d2 = datetime.now() delt = d2 - d1 self.project.log("transaction", "Acquisition completed in {}".format(str(delt)), "highlight", True)