def test_archive01(self): """ StateArchiver should archive all history directories by 1. Creating a .zip of a timestamped directory's files 2. Saving the .zip to /var/lib/waagent/history/ 2. Deleting the timestamped directory """ temp_files = [ 'Prod.0.manifest.xml', 'Prod.0.agentsManifest', 'Microsoft.Azure.Extensions.CustomScript.0.xml' ] for f in temp_files: self._write_file(f) flusher = StateFlusher(self.tmp_dir) flusher.flush(datetime.utcnow()) test_subject = StateArchiver(self.tmp_dir) test_subject.archive() timestamp_zips = os.listdir(self.history_dir) self.assertEqual(1, len(timestamp_zips)) zip_fn = timestamp_zips[0] # 2000-01-01T00:00:00.000000.zip ts_s = os.path.splitext(zip_fn)[0] # 2000-01-01T00:00:00.000000 self.assertIsIso8601(ts_s) ts = self.parse_isoformat(ts_s) self.assertDateTimeCloseTo(ts, datetime.utcnow(), timedelta(seconds=30)) zip_full = os.path.join(self.history_dir, zip_fn) self.assertZipContains(zip_full, temp_files)
def test_archive03(self): """ All archives should be purged, both with the new naming (with incarnation number) and with the old naming. """ start = datetime.now() timestamp1 = start + timedelta(seconds=5) timestamp2 = start + timedelta(seconds=10) dir_old = timestamp1.isoformat() dir_new = "{0}_incarnation_1".format(timestamp2.isoformat()) archive_old = "{0}.zip".format(timestamp1.isoformat()) archive_new = "{0}_incarnation_1.zip".format(timestamp2.isoformat()) self._write_file( os.path.join("history", dir_old, "Prod.0.manifest.xml")) self._write_file( os.path.join("history", dir_new, "Prod.1.manifest.xml")) self._write_file(os.path.join("history", archive_old)) self._write_file(os.path.join("history", archive_new)) self.assertEqual(4, len(os.listdir(self.history_dir)), "Not all entries were archived!") test_subject = StateArchiver(self.tmp_dir) with patch("azurelinuxagent.common.utils.archive._MAX_ARCHIVED_STATES", 0): test_subject.purge() archived_entries = os.listdir(self.history_dir) self.assertEqual(0, len(archived_entries), "Not all entries were purged!")
def test_archive04(self): """ The archive directory is created if it does not exist. This failure was caught when .purge() was called before .archive(). """ test_subject = StateArchiver(os.path.join(self.tmp_dir, 'does-not-exist')) test_subject.purge()
def __init__(self): self.osutil = get_osutil() self.dhcp_handler = get_dhcp_handler() self.protocol_util = get_protocol_util() self.stopped = True self.hostname = None self.dhcp_id_list = [] self.server_thread = None self.dhcp_warning_enabled = True self.last_archive = None self.archiver = StateArchiver(conf.get_lib_dir())
class CleanupGoalStateHistory(PeriodicOperation): def __init__(self): super(CleanupGoalStateHistory, self).__init__(conf.get_goal_state_history_cleanup_period()) self.archiver = StateArchiver(conf.get_lib_dir()) def _operation(self): """ Purge history and create a .zip of the history that has been preserved. """ self.archiver.purge() self.archiver.archive()
def test_archive02(self): """ StateArchiver should purge the MAX_ARCHIVED_STATES oldest files or directories. The oldest timestamps are purged first. This test case creates a mixture of archive files and directories. It creates 5 more values than MAX_ARCHIVED_STATES to ensure that 5 archives are cleaned up. It asserts that the files and directories are properly deleted from the disk. """ count = 6 total = _MAX_ARCHIVED_STATES + count start = datetime.now() timestamps = [] for i in range(0, total): timestamp = start + timedelta(seconds=i) timestamps.append(timestamp) if i % 2 == 0: filename = os.path.join( 'history', "{0}_incarnation_0".format(timestamp.isoformat()), 'Prod.0.manifest.xml') else: filename = os.path.join( 'history', "{0}_incarnation_0.zip".format(timestamp.isoformat())) self._write_file(filename) self.assertEqual(total, len(os.listdir(self.history_dir))) test_subject = StateArchiver(self.tmp_dir) test_subject.purge() archived_entries = os.listdir(self.history_dir) self.assertEqual(_MAX_ARCHIVED_STATES, len(archived_entries)) archived_entries.sort() for i in range(0, _MAX_ARCHIVED_STATES): timestamp = timestamps[i + count].isoformat() if i % 2 == 0: filename = "{0}_incarnation_0".format(timestamp) else: filename = "{0}_incarnation_0.zip".format(timestamp) self.assertTrue( filename in archived_entries, "'{0}' is not in the list of unpurged entires".format( filename))
def __init__(self): self.osutil = get_osutil() self.dhcp_handler = get_dhcp_handler() self.protocol_util = get_protocol_util() self.stopped = True self.hostname = None self.dhcp_id = None self.server_thread = None self.dhcp_warning_enabled = True self.last_archive = None self.archiver = StateArchiver(conf.get_lib_dir())
def test_archive01(self): """ StateArchiver should archive all history directories by 1. Creating a .zip of a timestamped directory's files 2. Saving the .zip to /var/lib/waagent/history/ 2. Deleting the timestamped directory """ temp_files = [ 'GoalState.0.xml', 'Prod.0.manifest.xml', 'Prod.0.agentsManifest', 'Microsoft.Azure.Extensions.CustomScript.0.xml' ] for current_file in temp_files: self._write_file(current_file) flusher = StateFlusher(self.tmp_dir) flusher.flush() test_subject = StateArchiver(self.tmp_dir) test_subject.archive() timestamp_zips = os.listdir(self.history_dir) self.assertEqual(1, len(timestamp_zips)) zip_fn = timestamp_zips[ 0] # 2000-01-01T00:00:00.000000_incarnation_N.zip timestamp_str, incarnation = self._parse_archive_name(zip_fn) self.assert_is_iso8601(timestamp_str) timestamp = self.parse_isoformat(timestamp_str) self.assert_datetime_close_to(timestamp, datetime.utcnow(), timedelta(seconds=30)) self.assertEqual("0", incarnation) zip_full = os.path.join(self.history_dir, zip_fn) self.assertEqual(self.assert_zip_contains(zip_full, temp_files), None)
def __init__(self): self.osutil = get_osutil() self.dhcp_handler = get_dhcp_handler() self.protocol_util = None self._protocol = None self.stopped = True self.hostname = None self.dhcp_id_list = [] self.server_thread = None self.dhcp_warning_enabled = True self.archiver = StateArchiver(conf.get_lib_dir()) self._reset_firewall_rules = False self._periodic_operations = [ PeriodicOperation("_remove_persistent_net_rules", self._remove_persistent_net_rules_period, conf.get_remove_persistent_net_rules_period()), PeriodicOperation("_monitor_dhcp_client_restart", self._monitor_dhcp_client_restart, conf.get_monitor_dhcp_client_restart_period()), PeriodicOperation("_cleanup_goal_state_history", self._cleanup_goal_state_history, conf.get_goal_state_history_cleanup_period()) ] if conf.enable_firewall(): self._periodic_operations.append(PeriodicOperation("_enable_firewall", self._enable_firewall, conf.get_enable_firewall_period())) if conf.get_root_device_scsi_timeout() is not None: self._periodic_operations.append(PeriodicOperation("_set_root_device_scsi_timeout", self._set_root_device_scsi_timeout, conf.get_root_device_scsi_timeout_period())) if conf.get_monitor_hostname(): self._periodic_operations.append(PeriodicOperation("_monitor_hostname", self._monitor_hostname_changes, conf.get_monitor_hostname_period()))
class EnvHandler(object): """ Monitor changes to dhcp and hostname. If dhcp client process re-start has occurred, reset routes, dhcp with fabric. Monitor scsi disk. If new scsi disk found, set timeout """ def __init__(self): self.osutil = get_osutil() self.dhcp_handler = get_dhcp_handler() self.protocol_util = get_protocol_util() self.stopped = True self.hostname = None self.dhcp_id_list = [] self.server_thread = None self.dhcp_warning_enabled = True self.last_archive = None self.archiver = StateArchiver(conf.get_lib_dir()) def run(self): if not self.stopped: logger.info("Stop existing env monitor service.") self.stop() self.stopped = False logger.info("Start env monitor service.") self.dhcp_handler.conf_routes() self.hostname = self.osutil.get_hostname_record() self.dhcp_id_list = self.get_dhcp_client_pid() self.start() def is_alive(self): return self.server_thread.is_alive() def start(self): self.server_thread = threading.Thread(target=self.monitor) self.server_thread.setDaemon(True) self.server_thread.setName("EnvHandler") self.server_thread.start() def monitor(self): """ Monitor firewall rules Monitor dhcp client pid and hostname. If dhcp client process re-start has occurred, reset routes. Purge unnecessary files from disk cache. """ protocol = self.protocol_util.get_protocol() reset_firewall_fules = False while not self.stopped: self.osutil.remove_rules_files() if conf.enable_firewall(): # If the rules ever change we must reset all rules and start over again. # # There was a rule change at 2.2.26, which started dropping non-root traffic # to WireServer. The previous rules allowed traffic. Having both rules in # place negated the fix in 2.2.26. if not reset_firewall_fules: self.osutil.remove_firewall(dst_ip=protocol.endpoint, uid=os.getuid()) reset_firewall_fules = True success = self.osutil.enable_firewall(dst_ip=protocol.endpoint, uid=os.getuid()) add_periodic(logger.EVERY_HOUR, AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.Firewall, is_success=success, log_event=False) timeout = conf.get_root_device_scsi_timeout() if timeout is not None: self.osutil.set_scsi_disks_timeout(timeout) if conf.get_monitor_hostname(): self.handle_hostname_update() self.handle_dhclient_restart() self.archive_history() time.sleep(5) def handle_hostname_update(self): curr_hostname = socket.gethostname() if curr_hostname != self.hostname: logger.info("EnvMonitor: Detected hostname change: {0} -> {1}", self.hostname, curr_hostname) self.osutil.set_hostname(curr_hostname) self.osutil.publish_hostname(curr_hostname) self.hostname = curr_hostname def get_dhcp_client_pid(self): pid = [] try: # return a sorted list since handle_dhclient_restart needs to compare the previous value with # the new value and the comparison should not be affected by the order of the items in the list pid = sorted(self.osutil.get_dhcp_pid()) if len(pid) == 0 and self.dhcp_warning_enabled: logger.warn("Dhcp client is not running.") except Exception as exception: if self.dhcp_warning_enabled: logger.error("Failed to get the PID of the DHCP client: {0}", ustr(exception)) self.dhcp_warning_enabled = len(pid) != 0 return pid def handle_dhclient_restart(self): if len(self.dhcp_id_list) == 0: self.dhcp_id_list = self.get_dhcp_client_pid() return if all(self.osutil.check_pid_alive(pid) for pid in self.dhcp_id_list): return new_pid = self.get_dhcp_client_pid() if len(new_pid) != 0 and new_pid != self.dhcp_id_list: logger.info( "EnvMonitor: Detected dhcp client restart. Restoring routing table." ) self.dhcp_handler.conf_routes() self.dhcp_id_list = new_pid def archive_history(self): """ Purge history if we have exceed the maximum count. Create a .zip of the history that has been preserved. """ if self.last_archive is not None \ and datetime.datetime.utcnow() < \ self.last_archive + ARCHIVE_INTERVAL: return self.archiver.purge() self.archiver.archive() def stop(self): """ Stop server communication and join the thread to main thread. """ self.stopped = True if self.server_thread is not None: self.server_thread.join()
class EnvHandler(object): """ Monitor changes to dhcp and hostname. If dhcp client process re-start has occurred, reset routes, dhcp with fabric. Monitor scsi disk. If new scsi disk found, set timeout """ def __init__(self): self.osutil = get_osutil() self.dhcp_handler = get_dhcp_handler() self.protocol_util = get_protocol_util() self.stopped = True self.hostname = None self.dhcp_id = None self.server_thread = None self.dhcp_warning_enabled = True self.last_archive = None self.archiver = StateArchiver(conf.get_lib_dir()) def run(self): if not self.stopped: logger.info("Stop existing env monitor service.") self.stop() self.stopped = False logger.info("Start env monitor service.") self.dhcp_handler.conf_routes() self.hostname = self.osutil.get_hostname_record() self.dhcp_id = self.osutil.get_dhcp_pid() self.start() def is_alive(self): return self.server_thread.is_alive() def start(self): self.server_thread = threading.Thread(target=self.monitor) self.server_thread.setDaemon(True) self.server_thread.start() def monitor(self): """ Monitor firewall rules Monitor dhcp client pid and hostname. If dhcp client process re-start has occurred, reset routes. Purge unnecessary files from disk cache. """ protocol = self.protocol_util.get_protocol() reset_firewall_fules = False while not self.stopped: self.osutil.remove_rules_files() if conf.enable_firewall(): # If the rules ever change we must reset all rules and start over again. # # There was a rule change at 2.2.26, which started dropping non-root traffic # to WireServer. The previous rules allowed traffic. Having both rules in # place negated the fix in 2.2.26. if not reset_firewall_fules: self.osutil.remove_firewall(dst_ip=protocol.endpoint, uid=os.getuid()) reset_firewall_fules = True success = self.osutil.enable_firewall( dst_ip=protocol.endpoint, uid=os.getuid()) add_periodic( logger.EVERY_HOUR, AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.Firewall, is_success=success, log_event=False) timeout = conf.get_root_device_scsi_timeout() if timeout is not None: self.osutil.set_scsi_disks_timeout(timeout) if conf.get_monitor_hostname(): self.handle_hostname_update() self.handle_dhclient_restart() self.archive_history() time.sleep(5) def handle_hostname_update(self): curr_hostname = socket.gethostname() if curr_hostname != self.hostname: logger.info("EnvMonitor: Detected hostname change: {0} -> {1}", self.hostname, curr_hostname) self.osutil.set_hostname(curr_hostname) self.osutil.publish_hostname(curr_hostname) self.hostname = curr_hostname def handle_dhclient_restart(self): if self.dhcp_id is None: if self.dhcp_warning_enabled: logger.warn("Dhcp client is not running. ") self.dhcp_id = self.osutil.get_dhcp_pid() # disable subsequent error logging self.dhcp_warning_enabled = self.dhcp_id is not None return # the dhcp process has not changed since the last check if self.osutil.check_pid_alive(self.dhcp_id.strip()): return new_pid = self.osutil.get_dhcp_pid() if new_pid is not None and new_pid != self.dhcp_id: logger.info("EnvMonitor: Detected dhcp client restart. " "Restoring routing table.") self.dhcp_handler.conf_routes() self.dhcp_id = new_pid def archive_history(self): """ Purge history if we have exceed the maximum count. Create a .zip of the history that has been preserved. """ if self.last_archive is not None \ and datetime.datetime.utcnow() < \ self.last_archive + ARCHIVE_INTERVAL: return self.archiver.purge() self.archiver.archive() def stop(self): """ Stop server communication and join the thread to main thread. """ self.stopped = True if self.server_thread is not None: self.server_thread.join()
class EnvHandler(object): """ Monitor changes to dhcp and hostname. If dhcp client process re-start has occurred, reset routes, dhcp with fabric. Monitor scsi disk. If new scsi disk found, set timeout """ def __init__(self): self.osutil = get_osutil() self.dhcp_handler = get_dhcp_handler() self.protocol_util = get_protocol_util() self.stopped = True self.hostname = None self.dhcp_id = None self.server_thread = None self.dhcp_warning_enabled = True self.last_archive = None self.archiver = StateArchiver(conf.get_lib_dir()) def run(self): if not self.stopped: logger.info("Stop existing env monitor service.") self.stop() self.stopped = False logger.info("Start env monitor service.") self.dhcp_handler.conf_routes() self.hostname = self.osutil.get_hostname_record() self.dhcp_id = self.osutil.get_dhcp_pid() self.start() def is_alive(self): return self.server_thread.is_alive() def start(self): self.server_thread = threading.Thread(target=self.monitor) self.server_thread.setDaemon(True) self.server_thread.start() def monitor(self): """ Monitor firewall rules Monitor dhcp client pid and hostname. If dhcp client process re-start has occurred, reset routes. Purge unnecessary files from disk cache. """ protocol = self.protocol_util.get_protocol() while not self.stopped: self.osutil.remove_rules_files() if conf.enable_firewall(): success = self.osutil.enable_firewall( dst_ip=protocol.endpoint, uid=os.getuid()) add_periodic( logger.EVERY_HOUR, AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.Firewall, is_success=success, log_event=False) timeout = conf.get_root_device_scsi_timeout() if timeout is not None: self.osutil.set_scsi_disks_timeout(timeout) if conf.get_monitor_hostname(): self.handle_hostname_update() self.handle_dhclient_restart() self.archive_history() time.sleep(5) def handle_hostname_update(self): curr_hostname = socket.gethostname() if curr_hostname != self.hostname: logger.info("EnvMonitor: Detected hostname change: {0} -> {1}", self.hostname, curr_hostname) self.osutil.set_hostname(curr_hostname) self.osutil.publish_hostname(curr_hostname) self.hostname = curr_hostname def handle_dhclient_restart(self): if self.dhcp_id is None: if self.dhcp_warning_enabled: logger.warn("Dhcp client is not running. ") self.dhcp_id = self.osutil.get_dhcp_pid() # disable subsequent error logging self.dhcp_warning_enabled = self.dhcp_id is not None return # the dhcp process has not changed since the last check if self.osutil.check_pid_alive(self.dhcp_id.strip()): return new_pid = self.osutil.get_dhcp_pid() if new_pid is not None and new_pid != self.dhcp_id: logger.info("EnvMonitor: Detected dhcp client restart. " "Restoring routing table.") self.dhcp_handler.conf_routes() self.dhcp_id = new_pid def archive_history(self): """ Purge history if we have exceed the maximum count. Create a .zip of the history that has been preserved. """ if self.last_archive is not None \ and datetime.datetime.utcnow() < \ self.last_archive + ARCHIVE_INTERVAL: return self.archiver.purge() self.archiver.archive() def stop(self): """ Stop server communication and join the thread to main thread. """ self.stopped = True if self.server_thread is not None: self.server_thread.join()
def __init__(self): super(CleanupGoalStateHistory, self).__init__(conf.get_goal_state_history_cleanup_period()) self.archiver = StateArchiver(conf.get_lib_dir())
class EnvHandler(object): """ Monitor changes to dhcp and hostname. If dhcp client process re-start has occurred, reset routes, dhcp with fabric. Monitor scsi disk. If new scsi disk found, set timeout """ def __init__(self): self.osutil = get_osutil() self.dhcp_handler = get_dhcp_handler() self.protocol_util = None self._protocol = None self.stopped = True self.hostname = None self.dhcp_id_list = [] self.server_thread = None self.dhcp_warning_enabled = True self.archiver = StateArchiver(conf.get_lib_dir()) self._reset_firewall_rules = False self._periodic_operations = [ PeriodicOperation("_remove_persistent_net_rules", self._remove_persistent_net_rules_period, conf.get_remove_persistent_net_rules_period()), PeriodicOperation("_monitor_dhcp_client_restart", self._monitor_dhcp_client_restart, conf.get_monitor_dhcp_client_restart_period()), PeriodicOperation("_cleanup_goal_state_history", self._cleanup_goal_state_history, conf.get_goal_state_history_cleanup_period()) ] if conf.enable_firewall(): self._periodic_operations.append(PeriodicOperation("_enable_firewall", self._enable_firewall, conf.get_enable_firewall_period())) if conf.get_root_device_scsi_timeout() is not None: self._periodic_operations.append(PeriodicOperation("_set_root_device_scsi_timeout", self._set_root_device_scsi_timeout, conf.get_root_device_scsi_timeout_period())) if conf.get_monitor_hostname(): self._periodic_operations.append(PeriodicOperation("_monitor_hostname", self._monitor_hostname_changes, conf.get_monitor_hostname_period())) def run(self): if not self.stopped: logger.info("Stop existing env monitor service.") self.stop() self.stopped = False logger.info("Start env monitor service.") self.dhcp_handler.conf_routes() self.hostname = self.osutil.get_hostname_record() self.dhcp_id_list = self.get_dhcp_client_pid() self.start() def is_alive(self): return self.server_thread.is_alive() def start(self): self.server_thread = threading.Thread(target=self.monitor) self.server_thread.setDaemon(True) self.server_thread.setName("EnvHandler") self.server_thread.start() def monitor(self): try: # The initialization of ProtocolUtil for the Environment thread should be done within the thread itself rather # than initializing it in the ExtHandler thread. This is done to avoid any concurrency issues as each # thread would now have its own ProtocolUtil object as per the SingletonPerThread model. self.protocol_util = get_protocol_util() self._protocol = self.protocol_util.get_protocol() while not self.stopped: try: for op in self._periodic_operations: op.run() except Exception as e: logger.error("An error occurred in the environment thread main loop; will skip the current iteration.\n{0}", ustr(e)) finally: PeriodicOperation.sleep_until_next_operation(self._periodic_operations) except Exception as e: logger.error("An error occurred in the environment thread; will exit the thread.\n{0}", ustr(e)) def _remove_persistent_net_rules_period(self): self.osutil.remove_rules_files() def _enable_firewall(self): # If the rules ever change we must reset all rules and start over again. # # There was a rule change at 2.2.26, which started dropping non-root traffic # to WireServer. The previous rules allowed traffic. Having both rules in # place negated the fix in 2.2.26. if not self._reset_firewall_rules: self.osutil.remove_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid()) self._reset_firewall_rules = True success = self.osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid()) add_periodic( logger.EVERY_HOUR, AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.Firewall, is_success=success, log_event=False) def _set_root_device_scsi_timeout(self): self.osutil.set_scsi_disks_timeout(conf.get_root_device_scsi_timeout()) def _monitor_hostname_changes(self): curr_hostname = socket.gethostname() if curr_hostname != self.hostname: logger.info("EnvMonitor: Detected hostname change: {0} -> {1}", self.hostname, curr_hostname) self.osutil.set_hostname(curr_hostname) self.osutil.publish_hostname(curr_hostname) self.hostname = curr_hostname def get_dhcp_client_pid(self): pid = [] try: # return a sorted list since handle_dhclient_restart needs to compare the previous value with # the new value and the comparison should not be affected by the order of the items in the list pid = sorted(self.osutil.get_dhcp_pid()) if len(pid) == 0 and self.dhcp_warning_enabled: logger.warn("Dhcp client is not running.") except Exception as exception: if self.dhcp_warning_enabled: logger.error("Failed to get the PID of the DHCP client: {0}", ustr(exception)) self.dhcp_warning_enabled = len(pid) != 0 return pid def _monitor_dhcp_client_restart(self): self.handle_dhclient_restart() def handle_dhclient_restart(self): if len(self.dhcp_id_list) == 0: self.dhcp_id_list = self.get_dhcp_client_pid() return if all(self.osutil.check_pid_alive(pid) for pid in self.dhcp_id_list): return new_pid = self.get_dhcp_client_pid() if len(new_pid) != 0 and new_pid != self.dhcp_id_list: logger.info("EnvMonitor: Detected dhcp client restart. Restoring routing table.") self.dhcp_handler.conf_routes() self.dhcp_id_list = new_pid def _cleanup_goal_state_history(self): """ Purge history and create a .zip of the history that has been preserved. """ self.archiver.purge() self.archiver.archive() def stop(self): """ Stop server communication and join the thread to main thread. """ self.stopped = True if self.server_thread is not None: self.server_thread.join()