def hd2_metadatas(self): from cdr_plugin_folder_to_folder.metadata.Metadata import Metadata metadatas = [] for file_hash in self.hd2_file_hashes(): metadata = Metadata(file_hash=file_hash).load() if metadata.exists(): metadatas.append(metadata.data) return metadatas
def test_get_metadata_file_path(self): self.test_metadata = Metadata() self.test_metadata.add_file(self.test_file) self.metadata_service.metadata_folder = self.test_metadata.metadata_folder_path( ) assert self.metadata_service.get_metadata_file_path( ) == self.test_metadata.metadata_file_path() self.test_metadata.delete()
def create_metadata(self, file_path): self.metadata = Metadata() self.metadata.add_file(file_path) self.metadata_elastic.add_metadata( self.metadata.data) # save metadata to elastic log_info( message=f"created metadata for: {self.metadata.get_file_name()}", data={ "file_path": file_path, "metadata_file_path": self.metadata.metadata_file_path() }) return self.metadata
class test_Metadata_Elastic(Temp_Config): test_file = None @classmethod def setUpClass(cls) -> None: super().setUpClass() cls.test_file = temp_file( contents='Static text so that we have a static hash') cls.file_hash = '500286533bf75d769e9180a19414d1c3502dd52093e7351a0a9b1385d8f8961c' cls.metadata_elastic = Metadata_Elastic() Setup_Testing().configure_metadata_elastic(cls.metadata_elastic) cls.metadata_service = Metadata_Service() cls.metadata_service.metadata_elastic = cls.metadata_elastic if cls.metadata_elastic.enabled is False: pytest.skip('Elastic server not available') @classmethod def tearDownClass(cls) -> None: super().tearDownClass() file_delete(cls.test_file) def setUp(self) -> None: self.storage.hd1_delete_all_files() self.storage.hd2_delete_all_files() # assert cls.test_metadata.exists() #self.test_metadata_folder = self.test_metadata.metadata_folder_path() #Pre_Processor().clear_data_and_status_folders() pass def test_add_metadata(self): metadata = self.metadata_service.create_metadata( file_path=self.test_file) metadata_data = metadata.data original_hash = metadata.get_original_hash() result_add_metadata = self.metadata_elastic.add_metadata(metadata_data) assert original_hash == self.file_hash assert result_add_metadata.get('_shards').get('successful') == 1 elastic_metadata = self.metadata_elastic.get_metadata( original_hash=original_hash) local_metadata = metadata_data del elastic_metadata[self.metadata_elastic.time_field] assert elastic_metadata == metadata_data assert self.metadata_elastic.delete_metadata( original_hash=original_hash).get('result') == 'deleted' assert self.metadata_elastic.get_metadata( original_hash=original_hash) == {} def test_delete_all_metadata(self): self.metadata_elastic.delete_all_metadata() assert len(self.metadata_elastic.get_all_metadata()) == 0 def test_reload_metadatas(self): count = 2 self.add_test_files(count=count, execute_stage_1=True) assert self.metadata_elastic.reload_metadatas() == count def test_reset_elastic_data(self): hash_json = Hash_Json() count = 10 # use 1000 text_size = 500 # use 50000 self.add_test_files(count=count, text_size=text_size, execute_stage_1=True) message = self.metadata_elastic.reload_elastic_data() assert message == f'Elastic files_metadata has been reset and {count} metadata items reloaded' assert len(hash_json.load()) == count def test_reload_hash_json(self): hash_json = Hash_Json() count = 10 # use 1000 text_size = 500 # use 50000 self.add_test_files(count=count, text_size=text_size, execute_stage_1=True) assert len(hash_json.data()) == 0 self.metadata_elastic.reload_hash_json() assert len(hash_json.data()) > 0 # @log_duration # def reload_data_from_hd2(self): # return self.storage.hd2_metadatas() # # def test_reload_data_from_hd2(self): # self.add_test_files(count=1, execute_stage_1=True) # result = self.status.reload_data_from_hd2() # pprint(result) # # process_all_log_entries_and_end_logging_process() def test_get_from_file(self): metadata = self.metadata_service.create_metadata( file_path=self.test_file) metadata_data = self.metadata_service.get_from_file( metadata.metadata_folder_path()).data #assert self.metadata_service.metadata_folder == self.test_metadata_folder metadata_data['last_update_time'] = None assert metadata_data == { 'file_name': file_name(self.test_file), 'xml_report_status': None, 'last_update_time': None, 'rebuild_server': None, 'server_version': None, 'error': None, 'original_file_paths': [self.test_file], 'original_hash': self.file_hash, 'original_hash_calculation_time': metadata_data['original_hash_calculation_time'], 'original_file_extension': '.tmp', 'original_file_size': 41, 'rebuild_file_path': None, 'rebuild_hash': None, 'rebuild_status': FileStatus.INITIAL, 'rebuild_file_extension': None, 'rebuild_file_size': None, 'rebuild_file_duration': None, 'f2f_plugin_version': None, 'f2f_plugin_git_commit': None, 'hd1_to_hd2_copy_time': None, 'hd2_to_hd3_copy_time': None } metadata.delete() def test_get_metadata_file_path(self): self.test_metadata = Metadata() self.test_metadata.add_file(self.test_file) self.metadata_service.metadata_folder = self.test_metadata.metadata_folder_path( ) assert self.metadata_service.get_metadata_file_path( ) == self.test_metadata.metadata_file_path() self.test_metadata.delete() def test_setup(self): self.metadata_elastic.setup() assert self.metadata_elastic.index_name in self.metadata_elastic.elastic( ).elastic().index_list()
def setUp(self) -> None: self.file_hash = 'b94f6f125c79e3a5ffaa826f584c10d52ada669e6762051b826b55776d05aed2' self.metadata = Metadata() self.metadata_utils = Metadata_Utils()
class test_Metadata(TestCase): file_path = None file_copy_path = None @classmethod def setUpClass(cls) -> None: cls.file_path = temp_file(contents='some text') # test file cls.file_copy_path = cls.file_path + '_an_copy' # create a copy to test adding multiple files file_copy(cls.file_path, cls.file_copy_path) @classmethod def tearDownClass(cls) -> None: file_delete(cls.file_path) file_delete(cls.file_copy_path) def setUp(self) -> None: self.file_hash = 'b94f6f125c79e3a5ffaa826f584c10d52ada669e6762051b826b55776d05aed2' self.metadata = Metadata() self.metadata_utils = Metadata_Utils() def test_add_file(self): metadata = self.metadata file_paths = metadata.data.get('original_file_paths') assert self.metadata.exists() is False # metadata folder doesn't exist # adding file first time assert metadata.add_file( self.file_path ) == self.file_hash # add file and get file hash as return value assert metadata.exists() is True # confirm metadata folder now exists assert folder_exists(metadata.metadata_folder_path() ) # confirm metadata folder now exists assert file_exists( metadata.metadata_file_path()) # confirm metadata json file exists assert file_exists(metadata.source_file_path() ) # confirm source file was correctly put in place assert metadata.file_hash == self.metadata_utils.file_hash( metadata.source_file_path() ) # confirm hash of source file matches hash of file_path assert metadata.metadata_file_path() == path_combine( metadata.metadata_folder_path(), DEFAULT_METADATA_FILENAME ) # confirm metadata file is place in correct location file_paths = metadata.data.get('original_file_paths') assert file_paths == [ self.file_path ] # confirms that in this mode the entire path is preserved # adding same file 2nd time (with same hash and same name) assert metadata.add_file( self.file_path) == self.file_hash # adding the same file again file_paths = metadata.data.get('original_file_paths') assert file_paths == [self.file_path ] # should not impact this value (same as above) # adding same file 3nd time (with same hash but different name) assert metadata.add_file( self.file_copy_path ) == self.file_hash # adding the same file again (with different name) file_paths = metadata.data.get('original_file_paths') assert file_paths == [self.file_path, self.file_copy_path ] # will make the new file path be added # adding same file 4th time (with self.path_hd1 set to parent folder of path) file_parent_folder = parent_folder( self.file_path) # get parent folder of test file self.metadata.path_hd1 = file_parent_folder # assign it to the metadata variable used to calculate virtual paths assert metadata.add_file(self.file_path) == self.file_hash file_paths = metadata.data.get('original_file_paths') assert file_paths == [ self.file_path, self.file_copy_path, file_name(self.file_path) ] # confirm that the virtual file path was added as the 3rd item (in this case the file name) #clean up assert self.metadata.delete() is True assert folder_not_exists(self.metadata.metadata_folder_path()) def test_add_file_path(self): test_path_1 = path_combine(self.metadata.path_hd1, 'aaaa.txt') test_path_2 = path_combine(self.metadata.path_hd1, 'bbbb/ccc.txt') test_path_3 = 'dddd/eeee.txt' test_path_4 = '/fff/gggg.txt' file_paths = self.metadata.data.get('original_file_paths') assert file_paths == [] self.metadata.add_file_path(test_path_1) assert file_paths == [] self.metadata.file_hash = 'this value needs to be set for .add_file_path to work' self.metadata.add_file_path(test_path_1) self.metadata.add_file_path(test_path_2) self.metadata.add_file_path(test_path_3) self.metadata.add_file_path(test_path_4) assert file_paths == [ 'aaaa.txt', 'bbbb/ccc.txt', 'dddd/eeee.txt', '/fff/gggg.txt' ] def test_delete(self): assert self.metadata.delete() is False def test_metadata_file_path(self): assert self.metadata.metadata_folder_path() is None
def get_from_file(self, metadata_folder): self.metadata = Metadata(os.path.basename(metadata_folder)) self.metadata.get_from_file() self.metadata_folder = metadata_folder return self.metadata
class Metadata_Service: METADATA_FILE_NAME = "metadata.json" def __init__(self): self.file_path = None self.metadata_folder = None self.metadata = None self.config = Config() self.metadata_elastic = Metadata_Elastic().setup() def create_metadata(self, file_path): self.metadata = Metadata() self.metadata.add_file(file_path) self.metadata_elastic.add_metadata( self.metadata.data) # save metadata to elastic log_info( message=f"created metadata for: {self.metadata.get_file_name()}", data={ "file_path": file_path, "metadata_file_path": self.metadata.metadata_file_path() }) return self.metadata def get_from_file(self, metadata_folder): self.metadata = Metadata(os.path.basename(metadata_folder)) self.metadata.get_from_file() self.metadata_folder = metadata_folder return self.metadata def get_metadata_file_path(self): return os.path.join(self.metadata_folder, Metadata_Service.METADATA_FILE_NAME) def file_hash(self, file_path): return file_sha256(file_path) def get_original_file_paths(self, metadata_folder): self.get_from_file(metadata_folder) return self.metadata.get_original_file_paths() def get_status(self, metadata_folder): self.get_from_file(metadata_folder) return self.metadata.get_rebuild_status() def is_completed_status(self, metadata_folder): return (self.get_status(metadata_folder) == FileStatus.COMPLETED) def set_status_inprogress(self, metadata_folder): self.set_status(metadata_folder, FileStatus.IN_PROGRESS) def set_metadata_field(self, metadata_folder, field_name, value): self.get_from_file(metadata_folder) self.metadata.update_field(field_name, value) self.metadata_elastic.add_metadata( self.metadata.data) # save metadata to elastic def set_status(self, metadata_folder, rebuild_status): self.set_metadata_field(metadata_folder, 'rebuild_status', rebuild_status) def set_error(self, metadata_folder, error_details): self.set_metadata_field(metadata_folder, 'error', error_details) def set_xml_report_status(self, metadata_folder, xml_report_status): self.set_metadata_field(metadata_folder, 'xml_report_status', xml_report_status) def set_rebuild_server(self, metadata_folder, rebuild_server): self.set_metadata_field(metadata_folder, 'rebuild_server', rebuild_server) def set_server_version(self, metadata_folder, server_version): self.set_metadata_field(metadata_folder, 'server_version', server_version) def set_rebuild_file_path(self, metadata_folder, rebuild_file_path): self.set_metadata_field(metadata_folder, 'rebuild_file_path', rebuild_file_path) def set_rebuild_hash(self, metadata_folder, rebuild_hash): self.set_metadata_field(metadata_folder, 'rebuild_hash', rebuild_hash) def set_rebuild_file_size(self, metadata_folder, file_size): self.set_metadata_field(metadata_folder, 'rebuild_file_size', file_size) def set_rebuild_file_extension(self, metadata_folder, file_extension): self.set_metadata_field(metadata_folder, 'rebuild_file_extension', file_extension) def set_rebuild_file_duration(self, metadata_folder, rebuild_file_duration): self.set_metadata_field(metadata_folder, 'rebuild_file_duration', rebuild_file_duration) def set_f2f_plugin_version(self, metadata_folder, rebuild_file_duration): self.set_metadata_field(metadata_folder, 'f2f_plugin_version', rebuild_file_duration) def set_f2f_plugin_git_commit(self, metadata_folder, rebuild_file_duration): self.set_metadata_field(metadata_folder, 'f2f_plugin_git_commit', rebuild_file_duration) def set_hd1_to_hd2_copy_time(self, metadata_folder, seconds): self.set_metadata_field(metadata_folder, 'hd1_to_hd2_copy_time', seconds) def set_hd2_to_hd3_copy_time(self, metadata_folder, seconds): self.set_metadata_field(metadata_folder, 'hd2_to_hd3_copy_time', seconds)
def test_process_file(self): metadata = Metadata(file_hash=self.file_hash) assert metadata.exists() is False self.pre_processor.process(self.test_file) assert metadata.exists() is True
def tearDownClass(cls) -> None: file_delete(cls.test_file) folder_delete_all(cls.temp_dir) Metadata(file_hash=cls.file_hash).delete()