def test_get_updated_stats(self): test_file = tempfile.NamedTemporaryFile(dir=self.tmp_dir_1, delete=True) self.assertEqual(FileUtil.get_file_stat(test_file.name)[0], 0) test_file.write(b"test\n") test_file.flush() self.assertEqual(FileUtil.get_file_stat(test_file.name)[0], 5)
def test_create_checksum_file(self): test_file = tempfile.NamedTemporaryFile(dir=self.tmp_dir_1, delete=True) test_file.write(b"test\n") test_file.flush() FileUtil.create_checksum_file(test_file.name, '715a9aa9257aadb001e1b85c858b0a91') self.assertTrue(os.path.exists(test_file.name + '.done')) with open(test_file.name + '.done') as f: line = f.readline() self.assertEqual(line.strip(), '715a9aa9257aadb001e1b85c858b0a91' + ' ' + os.path.basename(test_file.name))
def test_create_checksum_file(self): test_file = tempfile.NamedTemporaryFile(dir=self.tmp_dir_1, delete=True) test_file.write(b"test\n") test_file.flush() FileUtil.create_checksum_file(test_file.name, '715a9aa9257aadb001e1b85c858b0a91') self.assertTrue(os.path.exists(test_file.name + '.done')) with open(test_file.name + '.done') as f: line = f.readline() self.assertEqual( line.strip(), '715a9aa9257aadb001e1b85c858b0a91' + ' ' + os.path.basename(test_file.name))
def test_get_file_tenant_and_user_name(self): self.assertEqual(FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/ca_user1/file_drop/xyz.gz.gpg', '/opt/edware/home/landing/arrivals'), ('ca', 'ca_user1')) self.assertEqual(FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/ca_user1/file_drop/xyz.gz.gpg', '/opt/edware/home/landing/arrivals/ca'), (None, None)) self.assertEqual(FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/ca_user1/xyz.gz.gpg', '/opt/edware/home/landing/arrivals'), (None, None)) self.assertEqual(FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/ca_user1/file_drop/xyz.gz.gpg', '/tmp'), (None, None)) self.assertEqual(FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/xyz.gz.gpg', '/opt/edware/home/landing/arrivals'), (None, None))
def move_files_local(self, files_to_move): ''' move files :param files_to_move: a list of files to move ''' files_moved = 0 # sort by file length in to get all checksum files sent first before source files files_to_move.sort(key=len) for file in files_to_move: self.logger.debug('moving file: ' + file) file_tenant_name, file_tenant_user_name = \ FileUtil.get_file_tenant_and_user_name(file, os.path.join(self.conf.get(WatcherConst.BASE_DIR), self.conf.get(WatcherConst.SOURCE_DIR))) try: staging_dir = self.conf.get(WatcherConst.STAGING_DIR) destination_dir = os.path.join(staging_dir, file_tenant_name, file_tenant_user_name) destination_file = os.path.basename(file) destination_partial_file = destination_file + '.partial' destination_partial_file_path = os.path.join(destination_dir, destination_partial_file) destination_file_path = os.path.join(destination_dir, destination_file) os.makedirs(os.path.dirname(destination_file_path), exist_ok=True) os.rename(file, destination_partial_file_path) os.rename(destination_partial_file_path, destination_file_path) files_moved += 1 except Exception as e: self.logger.error('File is failed to move for {file} ' 'with error {error}'.format(file=file, error=str(e))) return files_moved
def test_get_file_last_modified_time_for_valid_file(self): test_file = tempfile.NamedTemporaryFile(dir=self.tmp_dir_1, delete=True) file_last_modified_time = FileUtil.get_file_last_modified_time( test_file.name) time.sleep(2) self.assertTrue(int(time.time() - file_last_modified_time) > 1)
def move_files_by_sftp(self, files_to_move): ''' move files via sftp :param files_to_move: a list of files to move ''' files_moved = 0 # sort by file length in to get all checksum files sent first before source files files_to_move.sort(key=len) for file in files_to_move: self.logger.debug('SFTPing file: ' + file) file_tenant_name, file_tenant_user_name = \ FileUtil.get_file_tenant_and_user_name(file, os.path.join(self.conf.get(WatcherConst.BASE_DIR), self.conf.get(WatcherConst.SOURCE_DIR))) try: file_transfer_status = \ SendFileUtil.remote_transfer_file( source_file=file, hostname=self.conf.get(MoverConst.LANDING_ZONE_HOSTNAME), remote_base_dir=self.conf.get(MoverConst.ARRIVALS_PATH), file_tenantname=file_tenant_name, file_username=file_tenant_user_name, sftp_username=self.conf.get(MoverConst.SFTP_USER), private_key_file=self.conf.get(MoverConst.PRIVATE_KEY_FILE)) if file_transfer_status == 0: self.logger.debug('File transfer was success for {file}'.format(file=file)) os.remove(file) files_moved += 1 self.logger.debug('Deleted source file {file}'.format(file=file)) except RemoteCopyError as e: self.logger.error('File transfer failed for {file} ' 'with error {error}'.format(file=file, error=str(e))) return files_moved
def test_get_file_tenant_and_user_name(self): self.assertEqual( FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/ca_user1/file_drop/xyz.gz.gpg', '/opt/edware/home/landing/arrivals'), ('ca', 'ca_user1')) self.assertEqual( FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/ca_user1/file_drop/xyz.gz.gpg', '/opt/edware/home/landing/arrivals/ca'), (None, None)) self.assertEqual( FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/ca_user1/xyz.gz.gpg', '/opt/edware/home/landing/arrivals'), (None, None)) self.assertEqual( FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/ca_user1/file_drop/xyz.gz.gpg', '/tmp'), (None, None)) self.assertEqual( FileUtil.get_file_tenant_and_user_name( '/opt/edware/home/landing/arrivals/ca/xyz.gz.gpg', '/opt/edware/home/landing/arrivals'), (None, None))
def handle_missing_checksum_files(self): """Handle checksum files generation logic The method will generate checksum files for the source files which miss checksum file pair in the file_stats If a checksum file which is missing in the snapshot is actually available on the filesystem the entire file pair will dropped for next iteration """ all_files = self.file_stats.keys() # filter out the checksum files which will contain the checksum for a corresponding source file source_files = set(all_files) - set(fnmatch.filter(all_files, '*' + Const.CHECKSUM_FILE_EXTENSION)) for file in source_files: checksum_file = FileUtil.get_complement_file_name(file) if checksum_file not in all_files: if not os.path.exists(checksum_file): # create checksum file if does not exist (uses md5 checksum) system_checksum_file = FileUtil.create_checksum_file(source_file=file, file_hash=self.hasher.get_file_hash(file)) self.add_file_to_snapshot(system_checksum_file) else: # looks like the checksum file exists on filesystem (could have been dropped late) # the file pair will be dropped for next iteration self.remove_file_pair_from_dict(file)
def test_get_complement_file_name(self): self.assertEqual(FileUtil.get_complement_file_name(self.test_file_1), self.test_file_1 + ".done") self.assertEqual(FileUtil.get_complement_file_name(self.test_file_1 + ".done"), self.test_file_1)
def test_file_contains_hash(self): hex_digest, digest = get_file_hash(self.test_file_1) check_sum_file_path = create_checksum_file(self.test_file_1) self.assertTrue(FileUtil.file_contains_hash(check_sum_file_path, hex_digest))
def test_get_file_stat_for_empty_file(self): test_file = tempfile.NamedTemporaryFile(dir=self.tmp_dir_1, delete=True) self.assertEqual(FileUtil.get_file_stat(test_file.name)[0], 0)
def test_get_file_stat_for_invalid_file(self): self.assertIsNone(FileUtil.get_file_stat('/tmp/xyz.gpg'))
def remove_file_pair_from_dict(self, file): # check the file being removed and remove the corresponding pair file self.remove_file_from_dict(FileUtil.get_complement_file_name(file)) # remove the main file self.remove_file_from_dict(file)
def test_get_file_stat_for_non_empty_file(self): file_size = FileUtil.get_file_stat(self.test_file_1)[0] self.assertEqual(file_size, 5)
def add_file_to_snapshot(self, file_path): if file_path and os.path.exists(file_path): self.file_stats[file_path] = FileUtil.get_file_stat(file_path)
def test_file_contains_hash(self): hex_digest, digest = get_file_hash(self.test_file_1) check_sum_file_path = create_checksum_file(self.test_file_1) self.assertTrue( FileUtil.file_contains_hash(check_sum_file_path, hex_digest))
def _create_checksum(self, source_file): ''' creates a md5 checksum file for `source_file`. ''' checksum_value = self.hasher.get_file_hash(source_file) return FileUtil.create_checksum_file(source_file, checksum_value)
def test_get_file_last_modified_time_for_invalid_file(self): self.assertIsNone(FileUtil.get_file_last_modified_time('/tmp/xyz.gpg'))
def test_get_complement_file_name(self): self.assertEqual(FileUtil.get_complement_file_name(self.test_file_1), self.test_file_1 + ".done") self.assertEqual( FileUtil.get_complement_file_name(self.test_file_1 + ".done"), self.test_file_1)
def get_updated_file_stats(self): return {filename: FileUtil.get_file_stat(filename) for filename in self.file_stats.keys()}
def test_get_file_last_modified_time_for_valid_file(self): test_file = tempfile.NamedTemporaryFile(dir=self.tmp_dir_1, delete=True) file_last_modified_time = FileUtil.get_file_last_modified_time(test_file.name) time.sleep(2) self.assertTrue(int(time.time() - file_last_modified_time) > 1)
def verify_source_file_check_sum(self, source_file, checksum_file): file_hash = self.hasher.get_file_hash(source_file) return FileUtil.file_contains_hash(checksum_file, file_hash)
def valid_check_sum(self, source_file): checksum_file = FileUtil.get_complement_file_name(source_file) if not os.path.exists(source_file) or not os.path.exists(checksum_file): return False return self.verify_source_file_check_sum(source_file, checksum_file)