def test__start_logging(self): # todo: understand better why this test takes about 1.1 secs to execute (some of it is caused by the processing process starting, and elastic being setup) log_worker = start_logging() # trigger logging process log_info() # send 4 log messages log_warning() log_info(message=random_text(), data={'a': 42}) log_error(message='an error')
def get_xmlreport(self, endpoint, fileId, dir): log_info(message=f"getting XML Report for {fileId} at {endpoint}") xmlreport = self.xmlreport_request(endpoint, fileId) if not xmlreport: raise ValueError('Failed to obtain the XML report') try: json_obj = xmltodict.parse(xmlreport) file_extension = json_obj["gw:GWallInfo"]["gw:DocumentStatistics"][ "gw:DocumentSummary"]["gw:FileType"] self.meta_service.set_rebuild_file_extension(dir, file_extension) json_obj['original_hash'] = os.path.basename(dir) json_save_file_pretty(json_obj, os.path.join(dir, "report.json")) #self.report_elastic.add_report(json_obj) analysis_obj = self.analysis_json.get_file_analysis( os.path.basename(dir), json_obj) json_save_file_pretty(analysis_obj, os.path.join(dir, "analysis.json")) self.analysis_elastic.add_analysis(analysis_obj) return True except Exception as error: log_error( message=f"Error in parsing xmlreport for {fileId} : {error}") return False
def setUpClass(cls) -> None: cls.setup_testing = Setup_Testing() cls.log_worker = start_logging() cls.storage = Storage() cls.config = cls.storage.config from cdr_plugin_folder_to_folder.utils.Logging import log_info log_info(message='in Temp_Config') cls.setup_testing.set_config_to_temp_folder()
def ProcessDirectoryWithEndpoint(self, itempath, file_hash, endpoint_index): if not os.path.isdir(itempath): return False log_info( message= f"Starting ProcessDirectoryWithEndpoint on endpoint # {endpoint_index} for file {file_hash}" ) meta_service = Metadata_Service() original_file_path = meta_service.get_original_file_paths(itempath) events = Events_Log(itempath) endpoint = "http://" + self.config.endpoints['Endpoints'][ endpoint_index]['IP'] + ":" + self.config.endpoints['Endpoints'][ endpoint_index]['Port'] events.add_log("Processing with: " + endpoint) meta_service.set_f2f_plugin_version(itempath, API_VERSION) meta_service.set_f2f_plugin_git_commit(itempath, self.git_commit()) try: file_processing = File_Processing(events, self.events_elastic, self.report_elastic, self.analysis_elastic, meta_service) if not file_processing.processDirectory(endpoint, itempath): events.add_log("CANNOT be processed") return False log_data = { 'file': original_file_path, 'status': FileStatus.COMPLETED, 'error': 'none', 'timestamp': datetime.now(), } log_info('ProcessDirectoryWithEndpoint', data=log_data) meta_service.set_error(itempath, "none") meta_service.set_status(itempath, FileStatus.COMPLETED) self.hash_json.update_status(file_hash, FileStatus.COMPLETED) events.add_log("Has been processed") return True except Exception as error: log_data = { 'file': original_file_path, 'status': FileStatus.FAILED, 'error': str(error), } log_error(message='error in ProcessDirectoryWithEndpoint', data=log_data) meta_service.set_error(itempath, str(error)) meta_service.set_status(itempath, FileStatus.FAILED) self.hash_json.update_status(file_hash, FileStatus.FAILED) events.add_log("ERROR:" + str(error)) return False
def create_metadata(self, file_path): self.metadata = Metadata() self.metadata.add_file(file_path) self.metadata_elastic.add_metadata( self.metadata.data) # save metadata to elastic log_info( message=f"created metadata for: {self.metadata.get_file_name()}", data={ "file_path": file_path, "metadata_file_path": self.metadata.metadata_file_path() }) return self.metadata
def start_logging(): global logging_worker queue = get_logging_queue() enabled = get_logging_enabled() if logging_worker is None and enabled.value == 0: # if it already enabled don't start a new process worker = multiprocessing.Process(target=start_logging_process, args=(queue, enabled), daemon=True) worker.start() enabled.value = 1 # set enabled value logging_worker = worker log_info(message="Logging Process started") return logging_worker
def add_log(self, message, data=None): log_info(message=message, data=data) if data is str: data = {"str": data} self.get_from_file() json_data = { "timestamp": str(datetime.now()), "message": message, "data": data or {}, "uuid": self.unique_id } self.data["events"].append(json_data) self.write_to_file() return json_data
def LoopHashDirectories(self, thread_count=None): #Allow only a single loop to be run at a time if self.IsProcessing(): log_error( message= "ERROR: Attempt to start processing while processing is in progress" ) return False self.status.StartStatusThread() thread_count = thread_count or self.config.thread_count log_info(message="in LoopHashDirectories, about to start main loop") loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(self.LoopHashDirectoriesAsync(thread_count)) log_info(message="in LoopHashDirectories, Loop completed") self.status.StopStatusThread() return True
def LoopHashDirectoriesInternal(self, thread_count, do_single): if folder_exists(self.storage.hd2_data()) is False: log_message = "ERROR: rootdir does not exist: " + self.storage.hd2_data( ) log_error(log_message) return False if not isinstance(thread_count, int): raise TypeError("thread_count must be a integer") if not isinstance(do_single, bool): raise TypeError("thread_count must be a integer") log_message = f"LoopHashDirectoriesInternal started with {thread_count} threads" self.events.add_log(log_message) log_info(log_message) json_list = self.updateHashJson() log_message = f"LoopHashDirectoriesInternal started with {thread_count} threads" self.events.add_log(log_message) log_info(log_message) threads = list() process_index = 0 log_info( message=f'before Mapping thread_data for {len(json_list)} files') thread_data = [] for key in json_list: file_hash = key itempath = self.storage.hd2_data(key) if (FileStatus.COMPLETED == json_list[key]["file_status"]): self.events.add_log( f"The file processing has been already completed") continue if not os.path.exists(itempath): self.events.add_log( f"ERROR: Path \"{itempath}\" does not exist") json_list[key]["file_status"] = FileStatus.FAILED continue process_index += 1 thread_data.append(( itempath, file_hash, process_index, )) # # limit the number of parallel threads # # if process_index % int(thread_count) == 0: # todo: refactor this workflow to use multiprocess and queues # # Clean up the threads # for index, thread in enumerate(threads): # todo: since at the moment this will block allocating new threads until # thread.join() # all have finishing execution # # process_index += 1 # log_info(message=f"in LoopHashDirectoriesInternal process_index={process_index} , thread #{process_index % int(thread_count) }") # x = threading.Thread(target=self.ProcessDirectory, args=(itempath, file_hash, process_index,)) # threads.append(x) # x.start() # # if do_single: # break # # if not Loops.continue_processing: # break # for index, thread in enumerate(threads): # thread.join() log_info( message= f'after mapped thread_data, there are {len(thread_data)} mapped items' ) #thread_data = thread_data[:500] #log_info(message=f'to start with only processing {len(thread_data)} thread_data items') pool = ThreadPool(thread_count) results = pool.map(self.ProcessDirectory, thread_data) pool.close() pool.join() self.moveProcessedFiles() self.events.add_log("LoopHashDirectoriesInternal finished") return True
def do_rebuild(self, endpoint, hash, source_path, dir): log_info( message=f"Starting rebuild for file {hash} on endpoint {endpoint}") with Duration() as duration: event_data = { "endpoint": endpoint, "hash": hash, "source_path": source_path, "dir": dir } # todo: see if we can use a variable that holds the params data self.add_event_log('Starting File rebuild', event_data) self.meta_service.set_rebuild_server(dir, endpoint) encodedFile = FileService.base64encode(source_path) if not encodedFile: message = f"Failed to encode the file: {hash}" log_error(message=message) self.add_event_log(message) self.meta_service.set_error(dir, message) return False response = self.rebuild(endpoint, encodedFile) result = response.text if not result: message = f"Failed to rebuild the file : {hash}" log_error(message=message) self.add_event_log(message) self.meta_service.set_error(dir, message) return False try: for path in self.meta_service.get_original_file_paths(dir): #rebuild_file_path = path if path.startswith(self.config.hd1_location): rebuild_file_path = path.replace( self.config.hd1_location, self.config.hd3_location) else: rebuild_file_path = os.path.join( self.config.hd3_location, path) folder_create(parent_folder( rebuild_file_path)) # make sure parent folder exists final_rebuild_file_path = self.save_file( result, rebuild_file_path ) # returns actual file saved (which could be .html) # todo: improve the performance of these update since each will trigger a save file_size = os.path.getsize( final_rebuild_file_path) # calculate rebuilt file fize rebuild_hash = self.meta_service.file_hash( final_rebuild_file_path ) # calculate hash of final_rebuild_file_path self.meta_service.set_rebuild_file_size(dir, file_size) self.meta_service.set_rebuild_file_path( dir, final_rebuild_file_path ) # capture final_rebuild_file_path self.meta_service.set_rebuild_hash( dir, rebuild_hash) # capture it if not FileService.base64decode(result): message = f"Engine response could not be decoded" log_error(message=message, data=f"{result}") self.meta_service.set_error(dir, message) return False except Exception as error: message = f"Error Saving file for {hash} : {error}" log_error(message=message) self.meta_service.set_xml_report_status(dir, "No Report") self.meta_service.set_error(dir, message) return False headers = response.headers fileIdKey = "X-Adaptation-File-Id" # get XML report if fileIdKey in headers: if self.get_xmlreport(endpoint, headers[fileIdKey], dir): self.add_event_log('The XML report has been saved') self.meta_service.set_xml_report_status(dir, "Obtained") else: self.meta_service.set_xml_report_status( dir, "No XML Report") else: self.meta_service.set_xml_report_status( dir, "Failed to obtain") message = f'No X-Adaptation-File-Id header found in the response for {hash}' log_error(message) self.add_event_log(message) self.meta_service.set_error(dir, message) return False #raise ValueError("No X-Adaptation-File-Id header found in the response") # todo: add when server side supports this # SDKEngineVersionKey = "X-SDK-Engine-Version" # SDKAPIVersionKey = "X-SDK-Api-Version" # # if SDKEngineVersionKey in headers: # self.sdk_engine_version = headers[SDKEngineVersionKey] # if SDKAPIVersionKey in headers: # self.sdk_api_version = headers[SDKAPIVersionKey] # # self.meta_service.set_server_version(dir, "Engine:" + self.sdk_engine_version + " API:" + self.sdk_api_version ) log_info( message= f"rebuild ok for file {hash} on endpoint {endpoint} took {duration.seconds()} seconds" ) return True