def blocking_shutdown(): log.info("%s beginning shutdown" % self.fqdn) self._enter_shutdown() shutdown_start_time = IMLDateTime.utcnow() self.shutdown_agent() self._cluster.leave(self.nodename) shutdown_end_time = IMLDateTime.utcnow() shutdown_time = (shutdown_end_time - shutdown_start_time).seconds while shutdown_time < MIN_SHUTDOWN_DURATION: if not simulate_shutdown: break remaining_delay = MIN_SHUTDOWN_DURATION - shutdown_time log.info("%s, sleeping for %d seconds on shutdown" % (self.fqdn, remaining_delay)) shutdown_time += remaining_delay time.sleep(remaining_delay) self._exit_shutdown() log.info("%s shutdown complete" % self.fqdn) if reboot: log.info("%s rebooting" % self.fqdn) self.startup(simulate_shutdown)
def _parse_crm_as_xml(self, raw): """ Parse the crm_mon response returns dict of nodes status or None if corosync is down """ return_dict = None try: root = xml.fromstring(raw) except ParseError: # not xml, might be a known error message if CorosyncPlugin.COROSYNC_CONNECTION_FAILURE not in raw: daemon_log.warning("Bad xml from corosync crm_mon: %s" % raw) else: return_dict = {} # Got node info, pack it up and return tm_str = root.find("summary/last_update").get("time") tm_datetime = IMLDateTime.strptime(tm_str, "%a %b %d %H:%M:%S %Y") return_dict.update( { "datetime": IMLDateTime.convert_datetime_to_utc( tm_datetime ).strftime("%Y-%m-%dT%H:%M:%S+00:00") } ) nodes = {} for node in root.findall("nodes/node"): host = node.get("name") nodes.update({host: node.attrib}) return_dict["nodes"] = nodes return_dict["options"] = {"stonith_enabled": False} cluster_options = root.find("summary/cluster_options") if cluster_options is not None: return_dict["options"].update( { "stonith_enabled": cluster_options.get("stonith-enabled") == "true" } ) return return_dict
def setUp(self): if not ManagedHost.objects.filter(fqdn=self.CLIENT_NAME).count(): if not ServerProfile.objects.filter(name="TestAgentRpcProfile").count(): server_profile = ServerProfile.objects.create( name="TestAgentRpcProfile", ui_name="Profile created to TestAgentRpc can work", managed=True, worker=False, ntp=True, corosync=True, corosync2=False, ) else: server_profile = ServerProfile.objects.get(name="TestAgentRpcProfile") self.host = ManagedHost.objects.create( fqdn=self.CLIENT_NAME, nodename=self.CLIENT_NAME, address=self.CLIENT_NAME, state="lnet_down", state_modified_at=IMLDateTime.utcnow(), server_profile=server_profile, ) LNetConfiguration.objects.create(host=self.host, state="lnet_down") ClientCertificate.objects.create(host=self.host, serial=self.CLIENT_CERT_SERIAL) else: self.host = ManagedHost.objects.get(fqdn=self.CLIENT_NAME) super(TestAgentRpc, self).setUp()
def poll(self): if self._healthy: time_since_contact = IMLDateTime.utcnow() - self.last_contact if time_since_contact > datetime.timedelta( seconds=self.CONTACT_TIMEOUT): self.update_health(False) return self._healthy
def update(self, boot_time, client_start_time): """ :return A boolean, true if the agent should be sent a SESSION_TERMINATE_ALL: indicates whether a fresh client run (different start time) is seen. """ self.last_contact = IMLDateTime.utcnow() if boot_time is not None and boot_time != self._boot_time: if self._boot_time is not None: HostRebootEvent.register_event(alert_item=self._host, boot_time=boot_time, severity=logging.WARNING) log.warning("Server %s rebooted at %s" % (self.fqdn, boot_time)) self._boot_time = boot_time job_scheduler_notify.notify(self._host, self._boot_time, {'boot_time': boot_time}) require_reset = False if client_start_time is not None and client_start_time != self._client_start_time: if self._client_start_time is not None: log.warning("Agent restart on server %s at %s" % (self.fqdn, client_start_time)) require_reset = True self._client_start_time = client_start_time if not self._healthy: self.update_health(True) return require_reset
def test_creation(self): """ During a POST, only expiry should be settable """ # Empty is OK response = self.api_client.post(self.RESOURCE_PATH, data={'profile': self.profile['resource_uri']}) self.assertHttpCreated(response) expiry_value = IMLDateTime.utcnow() expiry_value += datetime.timedelta(seconds = 120) expiry_value = expiry_value.replace(microsecond = 0) creation_allowed_values = { 'expiry': expiry_value.isoformat(), 'credits': 129 } for attr, test_val in creation_allowed_values.items(): response = self.api_client.post(self.RESOURCE_PATH, data={ 'profile': self.profile['resource_uri'], attr: test_val}) self.assertHttpCreated(response) created_obj = self.deserialize(response) self.assertEqual(created_obj[attr], test_val) # Anything else is not OK creation_forbidden_values = { 'secret': "X" * SECRET_LENGTH * 2, 'cancelled': True, 'id': 0 } for attribute, test_val in creation_forbidden_values.items(): response = self.api_client.post(self.RESOURCE_PATH, data={ 'profile': self.profile['resource_uri'], attribute: test_val}) self.assertHttpBadRequest(response)
def test_update_properties(self): update_scan = UpdateScan() update_scan.host = synthetic_host('test1') update_scan.started_at = IMLDateTime.utcnow() self.assertEqual(update_scan.host.properties, '{}') update_scan.update_properties(None) update_scan.update_properties({'key': 'value'})
def _send_package_data(self, host, data): # UpdateScan is a weird class, we have to instantiate and assign a host # to run the function we're testing. self.update_scan = UpdateScan() self.update_scan.host = host self.update_scan.started_at = IMLDateTime.utcnow() self.update_scan.update_packages({'agent': data})
def validate_token(key, credits=1): """ Validate that a token is valid to authorize a setup/register operation: * Check it's not expired * Check it has some credits :param credits: number of credits to decrement if valid :return 2-tuple (<http response if error, else None>, <registration token if valid, else None>) """ try: with transaction.commit_on_success(): token = RegistrationToken.objects.get(secret = key) if not token.credits: log.warning("Attempt to register with exhausted token %s" % key) return HttpForbidden(), None else: # Decrement .credits RegistrationToken.objects.filter(secret = key).update(credits = token.credits - credits) except RegistrationToken.DoesNotExist: log.warning("Attempt to register with non-existent token %s" % key) return HttpForbidden(), None else: now = IMLDateTime.utcnow() if token.expiry < now: log.warning("Attempt to register with expired token %s (now %s, expired at %s)" % (key, now, token.expiry)) return HttpForbidden(), None elif token.cancelled: log.warning("Attempt to register with cancelled token %s" % key) return HttpForbidden(), None return None, token
def _acquire_token(self, url, username, password, credit_count, duration=None, preferred_profile=None): """ Localised use of the REST API to acquire a server registration token. """ session = self._get_authenticated_session(url, username, password) # Acquire a server profile response = session.get("%sapi/server_profile/" % url) if not preferred_profile: profile = response.json()['objects'][0] else: try: profile = [ p for p in response.json()['objects'] if p['name'] == preferred_profile ][0] except IndexError: raise RuntimeError("No such profile: %s" % preferred_profile) args = {'credits': credit_count, 'profile': profile['resource_uri']} if duration is not None: args['expiry'] = (IMLDateTime.utcnow() + duration).isoformat() response = session.post("%sapi/registration_token/" % url, data=json.dumps(args)) assert response.ok return response.json()['secret']
def on_message(self, message): try: # Deserialize any datetimes which were serialized for JSON deserialized_update_attrs = {} model_klass = ContentType.objects.get_by_natural_key(*message["instance_natural_key"]).model_class() for attr, value in message["update_attrs"].items(): try: field = [f for f in model_klass._meta.fields if f.name == attr][0] except IndexError: # e.g. _id names, they aren't datetimes so ignore them deserialized_update_attrs[attr] = value else: if isinstance(field, DateTimeField): deserialized_update_attrs[attr] = IMLDateTime.parse(value) else: deserialized_update_attrs[attr] = value log.debug("on_message: %s %s" % (message, deserialized_update_attrs)) self._job_scheduler.notify( message["instance_natural_key"], message["instance_id"], message["time"], deserialized_update_attrs, message["from_states"], ) except: # Log bad messages and continue, swallow the exception to avoid # bringing down the whole service log.warning("on_message: bad message: %s" % traceback.format_exc())
def start_session(self): # This fake plugin needs to look at it corosync defined peers of # this fake server and determine # which are online. This happens in production by shelling out the # call crm_mon --one-shot --as-xml # To simulate this, the _server object which is a FakeServer, must # be able to tell this server what it's peers are. # This implementation looks at ALL the servers in the simulator, # and those ones that are also join'ed in the cluster are online. log.debug('cluster nodes: %s' % self._server._cluster.state['nodes']) nodes = [ (node_dict['nodename'], node_dict['online']) for node_dict in self._server._cluster.state['nodes'].values() ] log.debug('Nodes and state: %s' % nodes) dt = IMLDateTime.utcnow().isoformat() message = self.get_test_message(utc_iso_date_str=dt, node_status_list=nodes) log.debug(message) return message
def _scan(self, initial=False): started_at = IMLDateTime.utcnow().isoformat() audit = local.LocalAudit() # Only set resource_locations if we have the management package try: from chroma_agent.action_plugins import manage_targets resource_locations = manage_targets.get_resource_locations() except ImportError: resource_locations = None mounts = self._scan_mounts() # FIXME: HYD-1095 we should be sending a delta instead of a full dump every time # FIXME: At this time the 'capabilities' attribute is unused on the manager return { "started_at": started_at, "agent_version": agent_version(), "capabilities": plugin_manager.ActionPluginManager().capabilities, "metrics": audit.metrics(), "properties": audit.properties(), "mounts": mounts, "resource_locations": resource_locations, }
def fake_log_message(message): return LogMessage.objects.create( datetime=IMLDateTime.utcnow(), message=message, severity=0, facility=0, tag="", message_class=LogMessage.get_message_class(message))
def test_HYD648(self): """Test that datetimes in the API have a timezone""" synthetic_host('myserver') response = self.api_client.get("/api/host/") self.assertHttpOK(response) host = self.deserialize(response)['objects'][0] t = IMLDateTime.parse(host['state_modified_at']) self.assertNotEqual(t.tzinfo, None)
def run(self, host_id, host_data): host = ManagedHost.objects.get(pk=host_id) self.started_at = IMLDateTime.parse(host_data["started_at"]) self.host = host self.host_data = host_data log.debug("UpdateScan.run: %s" % self.host) self.audit_host()
def process_response(self, request, response): content_type = response['Content-Type'] if not any(x in content_type for x in TYPES.values()): return response def get_meta(prop): return request.META.get(prop, '') def try_loads(string, default): if TYPES['JSON'] not in content_type: return default try: return json.loads(string) except ValueError: return default request_data = { 'status': response.status_code, 'content_length': get_meta('CONTENT_LENGTH'), 'user_agent': get_meta('HTTP_USER_AGENT').decode('utf-8', 'replace'), 'body': try_loads(request.body, ''), 'response': try_loads(response.content, response.content), 'request_headers': dict([(key, val) for key, val in request.META.items() if key.isupper()]), 'response_headers': dict([(key.upper().replace('-', '_'), val) for key, val in response.items()]), # The following are required by Bunyan. 'hostname': get_meta('HTTP_X_FORWARDED_HOST'), 'name': 'Request Log', 'time': IMLDateTime.utcnow().isoformat(), 'v': 0, 'pid': os.getpid(), 'msg': 'Request made to {0} {1}'.format(request.method, request.get_full_path()), # Bunyan log level is python's log level + 10 'level': settings.LOG_LEVEL + 10, } logger.debug(json.dumps(request_data)) return response
def __init__(self, fqdn, boot_time, client_start_time): self.last_contact = None self.fqdn = fqdn self._healthy = False self._host = ManagedHost.objects.get(fqdn=self.fqdn) self._last_contact = IMLDateTime.utcnow() self._boot_time = boot_time self._client_start_time = client_start_time
def inject_log_message(self, message): log.debug("Injecting log message %s/%s" % (self.fqdn, message)) self._log_messages.append({ 'source': 'cluster_sim', 'severity': 1, 'facility': 1, 'message': message, 'datetime': IMLDateTime.utcnow().isoformat() })
def pretty_time(self, in_time): from iml_common.lib.date_time import IMLDateTime, FixedOffset, LocalOffset local_midnight = IMLDateTime.now().replace(hour=0, minute=0, second=0, microsecond=0) in_time = in_time.replace(tzinfo=FixedOffset(0)) out_time = in_time.astimezone(LocalOffset) if out_time < local_midnight: return out_time.strftime("%Y/%m/%d %H:%M:%S") else: return out_time.strftime("%H:%M:%S")
class Meta: object_class = RegistrationToken authentication = AnonymousAuthentication() authorization = TokenAuthorization() serializer = DateSerializer() list_allowed_methods = ["get", "post"] detail_allowed_methods = ["patch", "get"] fields = ["id", "secret", "expiry", "credits", "cancelled", "profile", "register_command"] resource_name = "registration_token" queryset = RegistrationToken.objects.filter(cancelled=False, expiry__gt=IMLDateTime.utcnow(), credits__gt=0) validation = RegistrationTokenValidation() always_return_data = True
def _scan(self, initial=False): started_at = IMLDateTime.utcnow().isoformat() audit = local.LocalAudit() # FIXME: HYD-1095 we should be sending a delta instead of a full dump every time # FIXME: At this time the 'capabilities' attribute is unused on the manager return { "started_at": started_at, "agent_version": agent_version(), "capabilities": plugin_manager.ActionPluginManager().capabilities, "metrics": audit.metrics(), "properties": audit.properties(), }
def test_priorities(self): """ Test that messages are consumed for POST based on the priority of the payload (data plane), or at the highest priority if no payload (control plane) """ client = mock.Mock() client._fqdn = "test_server" client.boot_time = IMLDateTime.utcnow() client.start_time = IMLDateTime.utcnow() writer = HttpWriter(client) def inject_messages(*args, **kwargs): # A control plane message writer.put( Message("SESSION_CREATE_REQUEST", "plugin_fuz", None, None, None)) low_body = DevicePluginMessage("low", PRIO_LOW) normal_body = DevicePluginMessage("normal", PRIO_NORMAL) high_body = DevicePluginMessage("high", PRIO_HIGH) writer.put(Message("DATA", "plugin_foo", low_body, "foo", 0)) writer.put(Message("DATA", "plugin_bar", normal_body, "foo", 1)) writer.put(Message("DATA", "plugin_baz", high_body, "foo", 2)) inject_messages() writer.send() self.assertEqual(client.post.call_count, 1) messages = client.post.call_args[0][0]["messages"] self.assertEqual(len(messages), 4) # First two messages (of equal priority) arrive in order or insertion self.assertEqual(messages[0]["plugin"], "plugin_fuz") self.assertEqual(messages[1]["plugin"], "plugin_baz") # Remaining messages arrive in priority order self.assertEqual(messages[2]["plugin"], "plugin_bar") self.assertEqual(messages[3]["plugin"], "plugin_foo")
def pop_log_messages(self): messages = self._log_messages d = datetime.datetime.now() messages.extend([{ 'source': 'cluster_sim', 'severity': 1, 'facility': 1, 'message': "%s %s %s" % (self.fqdn, d, a), 'datetime': IMLDateTime.utcnow().isoformat() } for a in range(0, self.log_rate)]) self._log_messages = [] return messages
def start_session(self): return { 'log_lines': [{ 'source': 'cluster_sim', 'severity': 1, 'facility': 1, 'message': 'Lustre: Cluster simulator systemd_journal session start %s %s' % (self._server.fqdn, datetime.datetime.now()), 'datetime': IMLDateTime.utcnow().isoformat() + 'Z' }] }
def test_fetch_not_dismissed_alerts_since_last_sample(self): data = { "begin__gte": str(self.sample_date), "dismissed": 'false', "severity__in": ['WARNING', 'ERROR'] } response = self.api_client.get("/api/alert/", data=data) self.assertHttpOK(response) objects = self.deserialize(response)['objects'] self.assertEqual(len(objects), 2, self.dump_objects(objects)) for ev in objects: self.assertEqual(ev['dismissed'], False) self.assertTrue(ev['severity'] in ['WARNING', 'ERROR']) self.assertTrue(IMLDateTime.parse(ev['begin']) >= self.sample_date)
class Meta: object_class = RegistrationToken authentication = AnonymousAuthentication() authorization = TokenAuthorization() serializer = DateSerializer() list_allowed_methods = ['get', 'post'] detail_allowed_methods = ['patch', 'get'] fields = [ 'id', 'secret', 'expiry', 'credits', 'cancelled', 'profile', 'register_command' ] resource_name = 'registration_token' queryset = RegistrationToken.objects.filter( cancelled=False, expiry__gt=IMLDateTime.utcnow(), credits__gt=0) validation = RegistrationTokenValidation() always_return_data = True
def test_fetch_not_dismissed_alerts_since_last_sample(self): data = { "begin__gte": str(self.sample_date), "dismissed": False, "severity__in": ["WARNING", "ERROR"] } response = self.api_client.get("/api/alert/", data=data) self.assertHttpOK(response) objects = self.deserialize(response)["objects"] self.assertEqual(len(objects), 2, self.dump_objects(objects)) for ev in objects: self.assertEqual(ev["dismissed"], False) self.assertTrue(ev["severity"] in ["WARNING", "ERROR"]) self.assertTrue(IMLDateTime.parse(ev["begin"]) >= self.sample_date)
def __init__(self, url, action_plugins, device_plugins, server_properties, crypto): super(AgentClient, self).__init__(url, crypto) self._fqdn = server_properties.fqdn self._nodename = server_properties.nodename self.boot_time = server_properties.boot_time self.start_time = IMLDateTime.utcnow() self.action_plugins = action_plugins self.device_plugins = device_plugins self.writer = HttpWriter(self) self.reader = HttpReader(self) self.sessions = SessionTable(self) self.stopped = threading.Event()
def _wait_for_server_boot_time(self, fqdn, old_boot_time=None): running_time = 0 while running_time < TEST_TIMEOUT: hosts = self.get_list("/api/host/") for host in hosts: if host['fqdn'] == fqdn: if host['boot_time'] is not None: boot_time = IMLDateTime.parse(host['boot_time']) if old_boot_time: if boot_time > old_boot_time: return boot_time else: return boot_time running_time += 1 time.sleep(1) self.assertLess(running_time, TEST_TIMEOUT, "Timed out waiting for host boot_time to be set.")