def __init__(self, file_hash=None): self.config = Config() self.storage = Storage() self.process_status = Status() self.metadata_utils = Metadata_Utils() self.path_hd1 = self.storage.hd1() self.data = self.default_data() self.file_hash = file_hash
def __init__(self): self.config = Config() self.meta_service = Metadata_Service() self.status = Status() self.storage = Storage() self.file_name = None # set in process() method self.current_path = None self.base_folder = None self.dst_folder = None self.dst_file_name = None self.status = Status() self.status.reset()
def __init__(self): self.use_es = False self.config = Config() self.status = Status() self.storage = Storage() self.hash_json = Hash_Json() self.events = Events_Log(self.config.hd2_status_location) self.events_elastic = Events_Log_Elastic() self.hash = None self.report_elastic = Report_Elastic() self.analysis_elastic = Analysis_Elastic() self.report_elastic.setup() self.analysis_elastic.setup() create_folder(self.storage.hd2_processed()) create_folder(self.storage.hd2_not_processed())
def __init__(self, events_log, events_elastic, report_elastic, analysis_elastic, meta_service): self.meta_service = meta_service self.events_log = events_log self.events_elastic = events_elastic self.storage = Storage() self.config = Config() self.status = Status() self.hash_json = Hash_Json() self.report_elastic = report_elastic self.sdk_api_version = "Not available" self.sdk_engine_version = "Not available" self.analysis_json = Analysis_Json() self.analysis_elastic = analysis_elastic
class Loops(object): continue_processing = False processing_started = False lock = asyncio.Lock() def __init__(self): self.use_es = False self.config = Config() self.status = Status() self.storage = Storage() self.hash_json = Hash_Json() self.events = Events_Log(self.config.hd2_status_location) self.events_elastic = Events_Log_Elastic() self.hash = None self.report_elastic = Report_Elastic() self.analysis_elastic = Analysis_Elastic() self.report_elastic.setup() self.analysis_elastic.setup() create_folder(self.storage.hd2_processed()) create_folder(self.storage.hd2_not_processed()) def IsProcessing(self): return Loops.processing_started def StopProcessing(self): Loops.continue_processing = False def HasBeenStopped(self): return not Loops.continue_processing def git_commit(self): git_commit = 'Not available' try: git_commit = subprocess.check_output(['git', 'rev-parse', 'HEAD' ]).decode("utf-8").rstrip() except Exception as e: pass return git_commit def ProcessDirectoryWithEndpoint(self, itempath, file_hash, endpoint_index): if not os.path.isdir(itempath): return False log_info( message= f"Starting ProcessDirectoryWithEndpoint on endpoint # {endpoint_index} for file {file_hash}" ) meta_service = Metadata_Service() original_file_path = meta_service.get_original_file_paths(itempath) events = Events_Log(itempath) endpoint = "http://" + self.config.endpoints['Endpoints'][ endpoint_index]['IP'] + ":" + self.config.endpoints['Endpoints'][ endpoint_index]['Port'] events.add_log("Processing with: " + endpoint) meta_service.set_f2f_plugin_version(itempath, API_VERSION) meta_service.set_f2f_plugin_git_commit(itempath, self.git_commit()) try: file_processing = File_Processing(events, self.events_elastic, self.report_elastic, self.analysis_elastic, meta_service) if not file_processing.processDirectory(endpoint, itempath): events.add_log("CANNOT be processed") return False log_data = { 'file': original_file_path, 'status': FileStatus.COMPLETED, 'error': 'none', 'timestamp': datetime.now(), } log_info('ProcessDirectoryWithEndpoint', data=log_data) meta_service.set_error(itempath, "none") meta_service.set_status(itempath, FileStatus.COMPLETED) self.hash_json.update_status(file_hash, FileStatus.COMPLETED) events.add_log("Has been processed") return True except Exception as error: log_data = { 'file': original_file_path, 'status': FileStatus.FAILED, 'error': str(error), } log_error(message='error in ProcessDirectoryWithEndpoint', data=log_data) meta_service.set_error(itempath, str(error)) meta_service.set_status(itempath, FileStatus.FAILED) self.hash_json.update_status(file_hash, FileStatus.FAILED) events.add_log("ERROR:" + str(error)) return False def ProcessDirectory(self, thread_data): (itempath, file_hash, process_index) = thread_data endpoint_index = process_index % self.config.endpoints_count if not Loops.continue_processing: return False tik = datetime.now() process_result = self.ProcessDirectoryWithEndpoint( itempath, file_hash, endpoint_index) if process_result: self.status.add_completed() tok = datetime.now() delta = tok - tik meta_service = Metadata_Service() meta_service.set_hd2_to_hd3_copy_time(itempath, delta.total_seconds()) else: self.status.add_failed() return process_result # note: removing retries from this method (it should not be handled like this #for idx in range(self.config.endpoints_count): # if self.ProcessDirectoryWithEndpoint(itempath, file_hash, endpoint_index): # return # # The Endpoint failed to process the file # # Retry it with the next one # endpoint_index = (endpoint_index + 1) % self.config.endpoints_count def updateHashJson(self): self.hash_json.reset() meta_service = Metadata_Service() for hash_folder in os.listdir(self.storage.hd2_data()): metadata_folder = self.storage.hd2_data(hash_folder) if not os.path.isdir(metadata_folder): continue metadata = meta_service.get_from_file(metadata_folder) file_name = metadata.get_file_name() original_hash = metadata.get_original_hash() status = metadata.get_rebuild_status() if status != FileStatus.COMPLETED: self.hash_json.add_file(original_hash, file_name) self.hash_json.save() self.status.set_processing_counters(len(self.hash_json.data())) return self.hash_json.data() def moveProcessedFiles(self): json_list = self.hash_json.data() for key in json_list: source_path = self.storage.hd2_data(key) if (FileStatus.COMPLETED == json_list[key]["file_status"]): destination_path = self.storage.hd2_processed(key) if folder_exists(destination_path): folder_delete_all(destination_path) shutil.move(source_path, destination_path) if (FileStatus.FAILED == json_list[key]["file_status"]): meta_service = Metadata_Service() meta_service.get_from_file(source_path) metadata = meta_service.metadata if ("Engine response could not be decoded" == metadata.get_error()) and \ metadata.get_original_file_extension() in ['.xml', '.json']: destination_path = self.storage.hd2_not_processed(key) if folder_exists(destination_path): folder_delete_all(destination_path) shutil.move(source_path, destination_path) def LoopHashDirectoriesInternal(self, thread_count, do_single): if folder_exists(self.storage.hd2_data()) is False: log_message = "ERROR: rootdir does not exist: " + self.storage.hd2_data( ) log_error(log_message) return False if not isinstance(thread_count, int): raise TypeError("thread_count must be a integer") if not isinstance(do_single, bool): raise TypeError("thread_count must be a integer") log_message = f"LoopHashDirectoriesInternal started with {thread_count} threads" self.events.add_log(log_message) log_info(log_message) json_list = self.updateHashJson() log_message = f"LoopHashDirectoriesInternal started with {thread_count} threads" self.events.add_log(log_message) log_info(log_message) threads = list() process_index = 0 log_info( message=f'before Mapping thread_data for {len(json_list)} files') thread_data = [] for key in json_list: file_hash = key itempath = self.storage.hd2_data(key) if (FileStatus.COMPLETED == json_list[key]["file_status"]): self.events.add_log( f"The file processing has been already completed") continue if not os.path.exists(itempath): self.events.add_log( f"ERROR: Path \"{itempath}\" does not exist") json_list[key]["file_status"] = FileStatus.FAILED continue process_index += 1 thread_data.append(( itempath, file_hash, process_index, )) # # limit the number of parallel threads # # if process_index % int(thread_count) == 0: # todo: refactor this workflow to use multiprocess and queues # # Clean up the threads # for index, thread in enumerate(threads): # todo: since at the moment this will block allocating new threads until # thread.join() # all have finishing execution # # process_index += 1 # log_info(message=f"in LoopHashDirectoriesInternal process_index={process_index} , thread #{process_index % int(thread_count) }") # x = threading.Thread(target=self.ProcessDirectory, args=(itempath, file_hash, process_index,)) # threads.append(x) # x.start() # # if do_single: # break # # if not Loops.continue_processing: # break # for index, thread in enumerate(threads): # thread.join() log_info( message= f'after mapped thread_data, there are {len(thread_data)} mapped items' ) #thread_data = thread_data[:500] #log_info(message=f'to start with only processing {len(thread_data)} thread_data items') pool = ThreadPool(thread_count) results = pool.map(self.ProcessDirectory, thread_data) pool.close() pool.join() self.moveProcessedFiles() self.events.add_log("LoopHashDirectoriesInternal finished") return True async def LoopHashDirectoriesAsync(self, thread_count, do_single=False): await Loops.lock.acquire() try: Loops.continue_processing = True Loops.processing_started = True self.status.set_started() self.LoopHashDirectoriesInternal(thread_count, do_single) finally: Loops.processing_started = False Loops.lock.release() self.status.set_stopped() self.hash_json.save() @log_duration def LoopHashDirectories(self, thread_count=None): #Allow only a single loop to be run at a time if self.IsProcessing(): log_error( message= "ERROR: Attempt to start processing while processing is in progress" ) return False self.status.StartStatusThread() thread_count = thread_count or self.config.thread_count log_info(message="in LoopHashDirectories, about to start main loop") loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(self.LoopHashDirectoriesAsync(thread_count)) log_info(message="in LoopHashDirectories, Loop completed") self.status.StopStatusThread() return True @log_duration def LoopHashDirectoriesSequential(self): #Allow only a single loop to be run at a time if self.IsProcessing(): log_error( "ERROR: Attempt to start processing while processing is in progress" ) return False loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(self.LoopHashDirectoriesAsync(1)) return True @log_duration def ProcessSingleFile(self): if self.IsProcessing(): log_error( "ERROR: Attempt to start processing while processing is in progress" ) return False loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) loop.run_until_complete(self.LoopHashDirectoriesAsync(1, True)) return True
class File_Processing: def __init__(self, events_log, events_elastic, report_elastic, analysis_elastic, meta_service): self.meta_service = meta_service self.events_log = events_log self.events_elastic = events_elastic self.storage = Storage() self.config = Config() self.status = Status() self.hash_json = Hash_Json() self.report_elastic = report_elastic self.sdk_api_version = "Not available" self.sdk_engine_version = "Not available" self.analysis_json = Analysis_Json() self.analysis_elastic = analysis_elastic def add_event_log(self, message, event_data={}): json_data = self.events_log.add_log(message, event_data) self.events_elastic.add_event_log(json_data) def base64request(self, endpoint, api_route, base64enc_file): try: url = endpoint + "/" + api_route payload = json.dumps({"Base64": base64enc_file}) headers = {'Content-Type': 'application/json'} return requests.request("POST", url, headers=headers, data=payload, timeout=int(self.config.request_timeout)) except Exception as e: log_error(str(e)) raise ValueError(str(e)) def xmlreport_request(self, endpoint, fileID): try: url = endpoint + "/api/Analyse/xmlreport?fileId=" + fileID payload = "" headers = {'Content-Type': 'application/octet-stream'} response = requests.request("GET", url, headers=headers, data=payload, timeout=int( self.config.request_timeout)) return response.text except Exception as e: raise ValueError(str(e)) def rebuild(self, endpoint, base64enc_file): return self.base64request(endpoint, "api/rebuild/base64", base64enc_file) def get_xmlreport(self, endpoint, fileId, dir): log_info(message=f"getting XML Report for {fileId} at {endpoint}") xmlreport = self.xmlreport_request(endpoint, fileId) if not xmlreport: raise ValueError('Failed to obtain the XML report') try: json_obj = xmltodict.parse(xmlreport) file_extension = json_obj["gw:GWallInfo"]["gw:DocumentStatistics"][ "gw:DocumentSummary"]["gw:FileType"] self.meta_service.set_rebuild_file_extension(dir, file_extension) json_obj['original_hash'] = os.path.basename(dir) json_save_file_pretty(json_obj, os.path.join(dir, "report.json")) #self.report_elastic.add_report(json_obj) analysis_obj = self.analysis_json.get_file_analysis( os.path.basename(dir), json_obj) json_save_file_pretty(analysis_obj, os.path.join(dir, "analysis.json")) self.analysis_elastic.add_analysis(analysis_obj) return True except Exception as error: log_error( message=f"Error in parsing xmlreport for {fileId} : {error}") return False # Save to HD3 def save_file(self, result, processed_path): self.add_event_log('Saving to: ' + processed_path) dirname = ntpath.dirname(processed_path) basename = ntpath.basename(processed_path) folder_create(dirname) decoded = FileService.base64decode(result) if decoded: FileService.wrtie_binary_file(dirname, basename, decoded) self.add_event_log('The decoded file has been saved') return processed_path else: FileService.wrtie_file( dirname, basename + ".html", result) # todo: capture better this workflow self.add_event_log('Decoding FAILED. The HTML file has been saved') return processed_path + '.html' # todo: refactor this workflow and how this is calculated @log_duration def do_rebuild(self, endpoint, hash, source_path, dir): log_info( message=f"Starting rebuild for file {hash} on endpoint {endpoint}") with Duration() as duration: event_data = { "endpoint": endpoint, "hash": hash, "source_path": source_path, "dir": dir } # todo: see if we can use a variable that holds the params data self.add_event_log('Starting File rebuild', event_data) self.meta_service.set_rebuild_server(dir, endpoint) encodedFile = FileService.base64encode(source_path) if not encodedFile: message = f"Failed to encode the file: {hash}" log_error(message=message) self.add_event_log(message) self.meta_service.set_error(dir, message) return False response = self.rebuild(endpoint, encodedFile) result = response.text if not result: message = f"Failed to rebuild the file : {hash}" log_error(message=message) self.add_event_log(message) self.meta_service.set_error(dir, message) return False try: for path in self.meta_service.get_original_file_paths(dir): #rebuild_file_path = path if path.startswith(self.config.hd1_location): rebuild_file_path = path.replace( self.config.hd1_location, self.config.hd3_location) else: rebuild_file_path = os.path.join( self.config.hd3_location, path) folder_create(parent_folder( rebuild_file_path)) # make sure parent folder exists final_rebuild_file_path = self.save_file( result, rebuild_file_path ) # returns actual file saved (which could be .html) # todo: improve the performance of these update since each will trigger a save file_size = os.path.getsize( final_rebuild_file_path) # calculate rebuilt file fize rebuild_hash = self.meta_service.file_hash( final_rebuild_file_path ) # calculate hash of final_rebuild_file_path self.meta_service.set_rebuild_file_size(dir, file_size) self.meta_service.set_rebuild_file_path( dir, final_rebuild_file_path ) # capture final_rebuild_file_path self.meta_service.set_rebuild_hash( dir, rebuild_hash) # capture it if not FileService.base64decode(result): message = f"Engine response could not be decoded" log_error(message=message, data=f"{result}") self.meta_service.set_error(dir, message) return False except Exception as error: message = f"Error Saving file for {hash} : {error}" log_error(message=message) self.meta_service.set_xml_report_status(dir, "No Report") self.meta_service.set_error(dir, message) return False headers = response.headers fileIdKey = "X-Adaptation-File-Id" # get XML report if fileIdKey in headers: if self.get_xmlreport(endpoint, headers[fileIdKey], dir): self.add_event_log('The XML report has been saved') self.meta_service.set_xml_report_status(dir, "Obtained") else: self.meta_service.set_xml_report_status( dir, "No XML Report") else: self.meta_service.set_xml_report_status( dir, "Failed to obtain") message = f'No X-Adaptation-File-Id header found in the response for {hash}' log_error(message) self.add_event_log(message) self.meta_service.set_error(dir, message) return False #raise ValueError("No X-Adaptation-File-Id header found in the response") # todo: add when server side supports this # SDKEngineVersionKey = "X-SDK-Engine-Version" # SDKAPIVersionKey = "X-SDK-Api-Version" # # if SDKEngineVersionKey in headers: # self.sdk_engine_version = headers[SDKEngineVersionKey] # if SDKAPIVersionKey in headers: # self.sdk_api_version = headers[SDKAPIVersionKey] # # self.meta_service.set_server_version(dir, "Engine:" + self.sdk_engine_version + " API:" + self.sdk_api_version ) log_info( message= f"rebuild ok for file {hash} on endpoint {endpoint} took {duration.seconds()} seconds" ) return True @log_duration def processDirectory(self, endpoint, dir): self.add_event_log("Processing Directory: " + dir) hash = ntpath.basename(dir) if len(hash) != 64: self.add_event_log("Unexpected hash length") #raise ValueError("Unexpected hash length") return False metadata_file_path = os.path.join(dir, Metadata_Service.METADATA_FILE_NAME) if not (FileService.file_exist(metadata_file_path)): self.add_event_log("The metadate.json file does not exist") #raise ValueError("The metadate.json file does not exist") return False if self.meta_service.is_completed_status(dir): self.add_event_log("Metadata is in the COMPLETED state") return False self.add_event_log("Set metadata status IN_PROGRESS") self.meta_service.set_status_inprogress(dir) self.status.add_in_progress() source_path = os.path.join(dir, "source") if not (FileService.file_exist(source_path)): self.add_event_log("File does not exist") #raise ValueError("File does not exist") return False self.add_event_log("Sending to rebuild") tik = datetime.now() status = self.do_rebuild(endpoint, hash, source_path, dir) # if status: # self.meta_service.set_status(dir, FileStatus.COMPLETED) # self.meta_service.set_error(dir, "none") # else: if not status: self.meta_service.set_status(dir, FileStatus.FAILED) self.hash_json.update_status(hash, FileStatus.FAILED) tok = datetime.now() delta = tok - tik self.meta_service.set_rebuild_file_duration(dir, delta.total_seconds()) return status
def setUp(self) -> None: self.status = Status() self.storage = self.status.storage
class test_Status(Temp_Config): def setUp(self) -> None: self.status = Status() self.storage = self.status.storage def test__FileStatus(self): assert inspect.getmembers(FileStatus, lambda a: type(a) is str) == [ ('COMPLETED', 'Completed Successfully'), ('FAILED', 'Completed with errors'), ('INITIAL', 'Initial'), ('IN_PROGRESS', 'In Progress'), ('NONE', 'None'), ('NOT_COPIED', 'Will not be copied'), ('TO_PROCESS', 'To Process'), ('__module__', 'cdr_plugin_folder_to_folder.pre_processing.Status') ] def test_server_status(self): status = self.status status.get_server_status() data = status.data() assert data[Status.VAR_NUMBER_OF_CPUS] > 0 cpu_percents = data[Status.VAR_CPU_UTILIZATION] assert len(cpu_percents) > 0 assert isinstance(cpu_percents[0], (int, float)) assert cpu_percents[0] >= 0 ram_percent = data[Status.VAR_RAM_UTILIZATION] assert isinstance(ram_percent, (int, float)) assert ram_percent > 0 processes_count = data[Status.VAR_NUM_OF_PROCESSES] assert isinstance(processes_count, (int)) assert processes_count > 0 assert data[Status.VAR_NETWORK_CONNECTIONS] >= 0 assert data[Status.VAR_DISK_PARTITIONS] > 0 def test_load_data(self): status = self.status assert status.data() == status.default_data() assert status.load_data().data() == status.default_data() assert status.get_files_count() == 0 for i in range(1, 100): assert status.add_completed() assert status.get_completed() == i assert status.add_failed() assert status.get_failed() == i assert status.add_file() assert status.get_files_copied() == i assert status.add_in_progress() assert status.get_in_progress() == 1 assert status.add_to_be_processed() assert status.get_files_to_process() == i assert status.set_stopped() assert status.get_current_status() == Processing_Status.STOPPED assert status.set_started() assert status.get_current_status() == Processing_Status.STARTED assert status.set_phase_1() assert status.get_current_status() == Processing_Status.PHASE_1 assert status.set_phase_2() assert status.get_current_status() == Processing_Status.PHASE_2 assert json_load_file(status.status_file_path()) == status.data() def test_status_file_path(self): assert self.status.status_file_path() == path_combine( self.storage.hd2_status(), Status.STATUS_FILE_NAME)
class Pre_Processor: def __init__(self): self.config = Config() self.meta_service = Metadata_Service() self.status = Status() self.storage = Storage() self.file_name = None # set in process() method self.current_path = None self.base_folder = None self.dst_folder = None self.dst_file_name = None self.status = Status() self.status.reset() #self.analysis_json = Analysis_Json() @log_duration def clear_data_and_status_folders(self): data_target = self.storage.hd2_data( ) # todo: refactor this clean up to the storage class status_target = self.storage.hd2_status() processed_target = self.storage.hd2_processed() folder_delete_all(data_target) folder_delete_all(status_target) folder_delete_all(processed_target) folder_create(data_target) folder_create(status_target) folder_create(processed_target) self.status.reset() def file_hash(self, file_path): return self.meta_service.file_hash(file_path) def prepare_folder(self, folder_to_process): if folder_to_process.startswith(self.storage.hd1()): return folder_to_process dirname = os.path.join(self.storage.hd1(), os.path.basename(folder_to_process)) if os.path.isdir(dirname): folder_delete_all(dirname) try: folder_copy(folder_to_process, dirname) finally: return dirname def process_folder(self, folder_to_process): if not os.path.isdir(folder_to_process): # todo: add an event log return False folder_to_process = self.prepare_folder(folder_to_process) files_count = 0 for folderName, subfolders, filenames in os.walk(folder_to_process): for filename in filenames: file_path = os.path.join(folderName, filename) if os.path.isfile(file_path): files_count += 1 self.status.set_files_count(files_count) for folderName, subfolders, filenames in os.walk(folder_to_process): for filename in filenames: file_path = os.path.join(folderName, filename) if os.path.isfile(file_path): self.process(file_path) return True @log_duration def process_files(self): self.status.StartStatusThread() self.status.set_phase_1() self.process_folder(self.storage.hd1()) self.status.set_phase_2() self.status.StopStatusThread() @log_duration def process(self, file_path): tik = datetime.now() metadata = self.meta_service.create_metadata(file_path=file_path) file_name = metadata.get_file_name() original_hash = metadata.get_original_hash() status = metadata.get_rebuild_status() self.update_status(file_name, original_hash, status) tok = datetime.now() delta = tok - tik if metadata.is_in_todo(): hash_folder_path = self.storage.hd2_data(original_hash) self.meta_service.set_hd1_to_hd2_copy_time(hash_folder_path, delta.total_seconds()) else: self.status.set_not_copied() def update_status(self, file_name, original_hash, status): if status == FileStatus.INITIAL: self.status.add_file()
class Metadata: def __init__(self, file_hash=None): self.config = Config() self.storage = Storage() self.process_status = Status() self.metadata_utils = Metadata_Utils() self.path_hd1 = self.storage.hd1() self.data = self.default_data() self.file_hash = file_hash #self.time_field = def get_from_file(self): # todo: refactor out this method self.load() return self.data def load(self): with open(self.metadata_file_path()) as json_file: self.data = json.load(json_file) return self def add_file(self, file_path): if file_exists(file_path): tik = datetime.now() self.set_file_hash(self.metadata_utils.file_hash(file_path)) tok = datetime.now() delta = tok - tik self.set_file_hash_calculation_time(delta.total_seconds()) if self.exists(): self.get_from_file() else: self.create(file_path) self.add_file_path(file_path) self.save() return self.file_hash def add_file_path(self, file_path: str): if self.file_hash: file_paths = self.data.get('original_file_paths') if 0 == len(file_paths): self.process_status.add_to_be_processed() if file_path.startswith( self.path_hd1): # check if path starts with hd1 file_path = os.path.relpath(file_path, self.path_hd1) if file_path not in file_paths: file_paths.append(file_path) return file_paths def create(self, file_path): if self.file_hash: folder_create(self.metadata_folder_path()) file_copy(file_path, self.source_file_path()) self.set_original_file_size(file_path) self.set_original_file_extension(file_path) self.set_original_file_name(file_path) def default_data(self): return { 'file_name': None, 'xml_report_status': None, 'last_update_time': None, 'rebuild_server': None, 'server_version': None, 'error': None, 'original_file_paths': [], 'original_hash': None, 'original_hash_calculation_time': None, 'original_file_extension': None, 'original_file_size': None, 'rebuild_file_path': None, 'rebuild_hash': None, 'rebuild_status': FileStatus.INITIAL, 'rebuild_file_extension': None, 'rebuild_file_size': None, 'rebuild_file_duration': None, 'f2f_plugin_version': None, 'f2f_plugin_git_commit': None, 'hd1_to_hd2_copy_time': None, 'hd2_to_hd3_copy_time': None } def delete(self): if self.exists(): folder_delete_all(self.metadata_folder_path()) return self.exists() is False return False def exists(self): return folder_exists(self.metadata_folder_path()) def metadata_file_exists(self): return file_exists(self.metadata_file_path()) def metadata_file_path(self): if self.file_hash: # todo: find a better solution that having to add this to all methods return path_combine(self.metadata_folder_path(), DEFAULT_METADATA_FILENAME) def metadata_folder_path(self): if not self.file_hash: return path = self.storage.hd2_not_processed(self.file_hash) if folder_exists(path): return path path = self.storage.hd2_processed(self.file_hash) if folder_exists(path): return path # never processed - must be in the 'todo' folder path = self.storage.hd2_data(self.file_hash) return path def is_in_todo(self): folder_exists(self.storage.hd2_data(self.file_hash)) def is_in_processed(self): folder_exists(self.storage.hd2_processed(self.file_hash)) def is_in_not_processed(self): folder_exists(self.storage.hd2_not_processed(self.file_hash)) def save(self): if self.exists(): json_save_file_pretty(python_object=self.data, path=self.metadata_file_path()) def update_field( self, field, updated_value ): # todo: optimise this if we get performance hits due to multiple updates self.data[field] = updated_value self.data['last_update_time'] = datetime_now() self.save() def set_file_hash(self, file_hash): self.file_hash = file_hash self.data['original_hash'] = file_hash self.data['last_update_time'] = datetime_now() if not self.exists(): self.save() def set_file_hash_calculation_time(self, seconds): self.data['original_hash_calculation_time'] = seconds def set_original_file_name(self, file_path): original_file_name = file_name(file_path) self.update_field('file_name', original_file_name) def set_original_file_size(self, file_path): file_size = os.path.getsize(file_path) self.update_field('original_file_size', file_size) def set_original_file_extension(self, file_path): extension = pathlib.Path(file_path).suffix self.update_field('original_file_extension', extension) def source_file_path(self): if self.file_hash: return path_combine(self.metadata_folder_path(), DEFAULT_SOURCE_FILENAME) def get_original_hash(self): return self.data.get('original_hash') def get_file_hash(self): return self.file_hash def get_file_name(self): return self.data.get('file_name') def get_rebuild_status(self): return self.data.get('rebuild_status') def get_original_file_paths(self): return self.data.get('original_file_paths') def get_last_update_time(self): return self.data.get('last_update_time') def get_error(self): return self.data.get('error') def get_original_file_extension(self): return self.data.get('original_file_extension') def report_file_path(self): if self.file_hash: return path_combine(self.metadata_folder_path(), DEFAULT_REPORT_FILENAME) def report_file_exists(self): return file_exists(self.report_file_path())