def test_log_collector_should_truncate_large_text_files_and_ignore_large_binary_files( self): # Set the size limit so that some files are too large to collect in full. with patch("azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), self._truncated_path( os.path.join( self.root_collect_dir, "waagent.log.1")), # this file should be truncated os.path.join(self.root_collect_dir, "waagent.log.2.gz"), os.path.join(self.root_collect_dir, "less_important_file"), os.path.join(self.root_collect_dir, "another_dir", "least_important_file") ] unexpected_files = [ os.path.join(self.root_collect_dir, "waagent.log.3.gz" ) # binary files cannot be truncated, ignore it ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) no_files = self._get_number_of_files_in_archive() self.assertEqual( 5, no_files, "Expected 5 files in archive, found {0}!".format(no_files))
def collect_logs(self, is_full_mode): if is_full_mode: print("Running log collector mode full") else: print("Running log collector mode normal") try: log_collector = LogCollector(is_full_mode) archive = log_collector.collect_logs_and_get_archive() print("Log collection successfully completed. Archive can be found at {0} " "and detailed log output can be found at {1}".format(archive, OUTPUT_RESULTS_FILE_PATH)) except Exception as e: print("Log collection completed unsuccessfully. Error: {0}".format(ustr(e))) print("Detailed log output can be found at {0}".format(OUTPUT_RESULTS_FILE_PATH)) sys.exit(1)
def test_log_collector_parses_commands_in_manifest(self): # Ensure familiar commands are parsed and unknowns are ignored (like diskinfo and malformed entries) file_to_collect = os.path.join(self.root_collect_dir, "waagent.log") folder_to_list = self.root_collect_dir manifest_content = """ echo,### Test header ### unknown command ll,{0} copy,{1} diskinfo,""".format(folder_to_list, file_to_collect) manifest_file_path = os.path.join(self.tmp_dir, "manifest") write_file(manifest_file_path, manifest_content) lc = LogCollector(manifest_file_path) archive = lc.collect_logs() with open(self.output_results_file_path, "r") as fh: results = fh.readlines() # Assert echo was parsed self.assertTrue( any([line.endswith("### Test header ###\n") for line in results])) # Assert unknown command was reported self.assertTrue( any([ line.endswith("ERROR Couldn\'t parse \"unknown command\"\n") for line in results ])) # Assert ll was parsed self.assertTrue( any([ "ls -alF {0}".format(folder_to_list) in line for line in results ])) # Assert copy was parsed self._assert_archive_created(archive) self._assert_files_are_in_archive(expected_files=[file_to_collect]) no_files = self._get_number_of_files_in_archive() self.assertEquals( 1, no_files, "Expected 1 file in archive, found {0}!".format(no_files))
def test_log_collector_parses_commands_in_manifest(self): # Ensure familiar commands are parsed and unknowns are ignored (like diskinfo and malformed entries) file_to_collect = os.path.join(self.root_collect_dir, "waagent.log") folder_to_list = self.root_collect_dir manifest = """ echo,### Test header ### unknown command ll,{0} copy,{1} diskinfo,""".format(folder_to_list, file_to_collect) with patch("azurelinuxagent.common.logcollector.MANIFEST_NORMAL", manifest): log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() with open(self.output_results_file_path, "r") as fh: # pylint: disable=invalid-name results = fh.readlines() # Assert echo was parsed self.assertTrue( any([line.endswith("### Test header ###\n") for line in results])) # Assert unknown command was reported self.assertTrue( any([ line.endswith("ERROR Couldn\'t parse \"unknown command\"\n") for line in results ])) # Assert ll was parsed self.assertTrue( any([ "ls -alF {0}".format(folder_to_list) in line for line in results ])) # Assert copy was parsed self._assert_archive_created(archive) self._assert_files_are_in_archive(expected_files=[file_to_collect]) no_files = self._get_number_of_files_in_archive() self.assertEqual( 1, no_files, "Expected 1 file in archive, found {0}!".format(no_files))
def test_log_collector_uses_full_manifest_when_full_mode_enabled(self): file_to_collect = os.path.join(self.root_collect_dir, "less_important_file") manifest = """ echo,### Test header ### copy,{0} """.format(file_to_collect) with patch("azurelinuxagent.common.logcollector.MANIFEST_FULL", manifest): log_collector = LogCollector(is_full_mode=True) archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) self._assert_files_are_in_archive(expected_files=[file_to_collect]) no_files = self._get_number_of_files_in_archive() self.assertEqual( 1, no_files, "Expected 1 file in archive, found {0}!".format(no_files))
def collect_logs(self, is_full_mode): if is_full_mode: print("Running log collector mode full") else: print("Running log collector mode normal") # Check the cgroups unit if CollectLogsHandler.should_validate_cgroups(): cpu_cgroup_path, memory_cgroup_path = SystemdCgroupsApi.get_process_cgroup_relative_paths( "self") cpu_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in cpu_cgroup_path) memory_slice_matches = (cgroupconfigurator.LOGCOLLECTOR_SLICE in memory_cgroup_path) if not cpu_slice_matches or not memory_slice_matches: print( "The Log Collector process is not in the proper cgroups:") if not cpu_slice_matches: print("\tunexpected cpu slice") if not memory_slice_matches: print("\tunexpected memory slice") sys.exit(logcollector.INVALID_CGROUPS_ERRCODE) try: log_collector = LogCollector(is_full_mode) archive = log_collector.collect_logs_and_get_archive() print( "Log collection successfully completed. Archive can be found at {0} " "and detailed log output can be found at {1}".format( archive, OUTPUT_RESULTS_FILE_PATH)) except Exception as e: print("Log collection completed unsuccessfully. Error: {0}".format( ustr(e))) print("Detailed log output can be found at {0}".format( OUTPUT_RESULTS_FILE_PATH)) sys.exit(1)
def test_log_collector_should_collect_all_files(self): # All files in the manifest should be collected, since none of them are over the individual file size limit, # and combined they do not cross the archive size threshold. lc = LogCollector(self.manifest_path) archive = lc.collect_logs() self._assert_archive_created(archive) expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), os.path.join(self.root_collect_dir, "waagent.log.1"), os.path.join(self.root_collect_dir, "waagent.log.2.gz"), os.path.join(self.root_collect_dir, "waagent.log.3.gz"), os.path.join(self.root_collect_dir, "less_important_file"), os.path.join(self.root_collect_dir, "another_dir", "least_important_file") ] self._assert_files_are_in_archive(expected_files) no_files = self._get_number_of_files_in_archive() self.assertEquals( 6, no_files, "Expected 6 files in archive, found {0}!".format(no_files))
def test_log_collector_should_clean_up_uncollected_truncated_files(self): # Make sure that truncated files that are no longer needed are cleaned up. If an existing truncated file # from a previous run is not collected in the current run, it should be deleted to free up space. # Specify files that have priority. The list is ordered, where the first entry has the highest priority. must_collect_files = [os.path.join(self.root_collect_dir, "waagent*")] # Set the archive size limit so that not all files can be collected. In that case, files will be added to the # archive according to their priority. # Set the size limit so that only two files can be collected, of which one needs to be truncated. with patch( "azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): with patch( "azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch( "azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), self._truncated_path( os.path.join( self.root_collect_dir, "waagent.log.1")), # this file should be truncated ] self._assert_files_are_in_archive(expected_files) no_files = self._get_number_of_files_in_archive() self.assertEqual( 2, no_files, "Expected 2 files in archive, found {0}!".format(no_files)) # Remove the original file so it is not collected anymore. In the next collection, the truncated file should be # removed both from the archive and from the filesystem. rm_files(os.path.join(self.root_collect_dir, "waagent.log.1")) with patch( "azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 2 * SMALL_FILE_SIZE): with patch( "azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): with patch( "azurelinuxagent.common.logcollector._FILE_SIZE_LIMIT", SMALL_FILE_SIZE): log_collector = LogCollector() second_archive = log_collector.collect_logs_and_get_archive( ) expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), os.path.join(self.root_collect_dir, "waagent.log.2.gz"), ] unexpected_files = [ self._truncated_path( os.path.join(self.root_collect_dir, "waagent.log.1")) ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) self._assert_archive_created(second_archive) no_files = self._get_number_of_files_in_archive() self.assertEqual( 2, no_files, "Expected 2 files in archive, found {0}!".format(no_files)) truncated_files = os.listdir(self.truncated_files_dir) self.assertEqual( 0, len(truncated_files), "Uncollected truncated file waagent.log.1 should have been deleted!" )
def test_log_collector_should_update_archive_when_files_are_new_or_modified_or_deleted( self): # Ensure the archive reflects the state of files on the disk at collection time. If a file was updated, it # needs to be updated in the archive, deleted if removed from disk, and added if not previously seen. log_collector = LogCollector() first_archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(first_archive) # Everything should be in the archive expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), os.path.join(self.root_collect_dir, "waagent.log.1"), os.path.join(self.root_collect_dir, "waagent.log.2.gz"), os.path.join(self.root_collect_dir, "waagent.log.3.gz"), os.path.join(self.root_collect_dir, "less_important_file"), os.path.join(self.root_collect_dir, "another_dir", "least_important_file") ] self._assert_files_are_in_archive(expected_files) no_files = self._get_number_of_files_in_archive() self.assertEqual( 6, no_files, "Expected 6 files in archive, found {0}!".format(no_files)) # Update a file and its last modified time to ensure the last modified time and last collection time are not # the same in this test file_to_update = os.path.join(self.root_collect_dir, "waagent.log") self._create_file_of_specific_size( file_to_update, LARGE_FILE_SIZE) # update existing file new_time = os.path.getmtime(file_to_update) + 5 os.utime(file_to_update, (new_time, new_time)) # Create a new file (that is covered by the manifest and will be collected) and delete a file self._create_file_of_specific_size( os.path.join(self.root_collect_dir, "less_important_file.1"), LARGE_FILE_SIZE) rm_files(os.path.join(self.root_collect_dir, "waagent.log.1")) second_archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(second_archive) expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), os.path.join(self.root_collect_dir, "waagent.log.2.gz"), os.path.join(self.root_collect_dir, "waagent.log.3.gz"), os.path.join(self.root_collect_dir, "less_important_file"), os.path.join(self.root_collect_dir, "less_important_file.1"), os.path.join(self.root_collect_dir, "another_dir", "least_important_file") ] unexpected_files = [ os.path.join(self.root_collect_dir, "waagent.log.1") ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) file = os.path.join(self.root_collect_dir, "waagent.log") # pylint: disable=redefined-builtin new_file_size = self._get_uncompressed_file_size(file) self.assertEqual( LARGE_FILE_SIZE, new_file_size, "File {0} hasn't been updated! Size in archive is {1}, but " "should be {2}.".format(file, new_file_size, LARGE_FILE_SIZE)) no_files = self._get_number_of_files_in_archive() self.assertEqual( 6, no_files, "Expected 6 files in archive, found {0}!".format(no_files))
def test_log_collector_should_prioritize_important_files_if_archive_too_big( self): # Set the archive size limit so that not all files can be collected. In that case, files will be added to the # archive according to their priority. # Specify files that have priority. The list is ordered, where the first entry has the highest priority. must_collect_files = [ os.path.join(self.root_collect_dir, "waagent*"), os.path.join(self.root_collect_dir, "less_important_file*") ] with patch( "azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): with patch( "azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): log_collector = LogCollector() archive = log_collector.collect_logs_and_get_archive() self._assert_archive_created(archive) expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), os.path.join(self.root_collect_dir, "waagent.log.1"), os.path.join(self.root_collect_dir, "waagent.log.2.gz") ] unexpected_files = [ os.path.join(self.root_collect_dir, "waagent.log.3.gz"), os.path.join(self.root_collect_dir, "less_important_file"), os.path.join(self.root_collect_dir, "another_dir", "least_important_file") ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) no_files = self._get_number_of_files_in_archive() self.assertEqual( 3, no_files, "Expected 3 files in archive, found {0}!".format(no_files)) # Second collection, if a file got deleted, delete it from the archive and add next file on the priority list # if there is enough space. rm_files(os.path.join(self.root_collect_dir, "waagent.log.3.gz")) with patch( "azurelinuxagent.common.logcollector._UNCOMPRESSED_ARCHIVE_SIZE_LIMIT", 10 * 1024 * 1024): with patch( "azurelinuxagent.common.logcollector._MUST_COLLECT_FILES", must_collect_files): second_archive = log_collector.collect_logs_and_get_archive() expected_files = [ os.path.join(self.root_collect_dir, "waagent.log"), os.path.join(self.root_collect_dir, "waagent.log.1"), os.path.join(self.root_collect_dir, "waagent.log.2.gz"), os.path.join(self.root_collect_dir, "less_important_file"), os.path.join(self.root_collect_dir, "another_dir", "least_important_file") ] unexpected_files = [ os.path.join(self.root_collect_dir, "waagent.log.3.gz") ] self._assert_files_are_in_archive(expected_files) self._assert_files_are_not_in_archive(unexpected_files) self._assert_archive_created(second_archive) no_files = self._get_number_of_files_in_archive() self.assertEqual( 5, no_files, "Expected 5 files in archive, found {0}!".format(no_files))