def test_is_idle(self, mock_pyhorn): controller = MatterhornController('http://mh.example.edu') controller._stats.running_jobs.return_value = 0 self.assertTrue(controller.is_idle(Mock(mh_host_url="foo"))) controller._stats.running_jobs.return_value = 1 self.assertFalse(controller.is_idle(Mock(mh_host_url="foo")))
def test_is_in_maintenance(self, mock_pyhorn): controller = MatterhornController('http://mh.example.edu') controller._hosts = [ Mock(base_url='foo', maintenance=False), Mock(base_url='bar', maintenance=True) ] self.assertFalse(controller.is_in_maintenance(Mock(mh_host_url="foo"))) self.assertTrue(controller.is_in_maintenance(Mock(mh_host_url="bar")))
def test_constructor_connect_error(self): controller = MatterhornController('mh.example.edu') self.assertFalse(controller.is_online()) self.assertRaisesRegexp( MatterhornCommunicationException, "Error connecting", controller.verify_connection )
def test_constructor_online(self): with patch('moscaler.matterhorn.pyhorn.MHClient', spec_set=MHClient) as mock_pyhorn: controller = MatterhornController('mh.example.edu') mock_pyhorn.assert_called_once_with( 'http://mh.example.edu', user=None, passwd=None, timeout=30 ) self.assertTrue(controller.is_online())
def test_get_host(self, mock_pyhorn): controller = MatterhornController('http://mh.example.edu') controller._hosts = [ Mock(id=1, base_url='foo'), Mock(id=2, base_url='bar') ] self.assertEqual(controller.get_host(Mock(mh_host_url="foo")).id, 1) self.assertEqual(controller.get_host(Mock(mh_host_url="bar")).id, 2)
def test_is_registered(self, mock_pyhorn): controller = MatterhornController('http://mh.example.edu') controller._hosts = [ Mock(base_url='foo'), Mock(base_url='bar') ] self.assertTrue(controller.is_registered(Mock(mh_host_url="bar"))) self.assertFalse(controller.is_registered(Mock(mh_host_url="blerg")))
def test_constructor_connect_timeout(self): with patch('moscaler.matterhorn.pyhorn.MHClient.me', side_effect=Timeout("timeout test")): controller = MatterhornController('mh.example.edu') self.assertFalse(controller.is_online()) self.assertRaisesRegexp( MatterhornCommunicationException, "timeout test", controller.verify_connection )
def __init__(self, cluster, force=False, dry_run=False): self.force = force self.dry_run = dry_run self.opsworks = boto3.client('opsworks') self.ec2 = boto3.resource('ec2') stacks = self.opsworks.describe_stacks()['Stacks'] try: self.stack = next(x for x in stacks if x['Name'] == cluster) except StopIteration: raise OpsworksControllerException( "No opsworks stack named '%s' found" % cluster ) instances = self.opsworks.describe_instances( StackId=self.stack['StackId'])['Instances'] try: mh_admin = next(x for x in instances if x['Hostname'].startswith('admin')) except StopIteration: raise OpsworksControllerException("No admin node found") self.mhorn = MatterhornController(mh_admin['PublicDns']) self._instances = [OpsworksInstance(x, self) for x in instances]
def __init__(self, cluster, force=False, dry_run=False): self.force = force self.dry_run = dry_run self.opsworks = boto3.client("opsworks") self.ec2 = boto3.resource("ec2") stacks = self.opsworks.describe_stacks()["Stacks"] try: self.stack = next(x for x in stacks if x["Name"] == cluster) except StopIteration: raise OpsworksControllerException("No opsworks stack named '%s' found" % cluster) instances = self.opsworks.describe_instances(StackId=self.stack["StackId"])["Instances"] try: mh_admin = next(x for x in instances if x["Hostname"].startswith("admin")) except StopIteration: raise OpsworksControllerException("No admin node found") self.mhorn = MatterhornController(mh_admin["PublicDns"]) self._instances = [OpsworksInstance(x, self) for x in instances]
class OpsworksController(object): def __init__(self, cluster, force=False, dry_run=False): self.force = force self.dry_run = dry_run self.opsworks = boto3.client('opsworks') self.ec2 = boto3.resource('ec2') stacks = self.opsworks.describe_stacks()['Stacks'] try: self.stack = next(x for x in stacks if x['Name'] == cluster) except StopIteration: raise OpsworksControllerException( "No opsworks stack named '%s' found" % cluster ) instances = self.opsworks.describe_instances( StackId=self.stack['StackId'])['Instances'] try: mh_admin = next(x for x in instances if x['Hostname'].startswith('admin')) except StopIteration: raise OpsworksControllerException("No admin node found") self.mhorn = MatterhornController(mh_admin['PublicDns']) self._instances = [OpsworksInstance(x, self) for x in instances] def __repr__(self): return "%s (%s)" % (self.__class__, self.stack['Name']) @property def instances(self): return [x for x in self._instances if not x.is_autoscale()] @property def workers(self): return [x for x in self.instances if x.is_worker()] @property def online_workers(self): return [x for x in self.workers if x.is_online()] @property def pending_workers(self): return [x for x in self.workers if x.is_pending()] @property def online_or_pending_workers(self): return self.online_workers + self.pending_workers @property def idle_workers(self): return self.mhorn.filter_idle(self.online_workers) @property def stopped_workers(self): return [x for x in self.workers if x.is_stopped()] @property def online_instances(self): return [x for x in self.instances if x.is_online()] @property def admin(self): try: return next(x for x in self.instances if x.is_admin()) except StopIteration: raise OpsworksControllerException("No admin node found") def status(self): status = { "cluster": self.stack['Name'], "matterhorn_online": self.mhorn.is_online(), "instances": len(self.instances), "instances_online": len(self.online_instances), "workers": len(self.workers), "workers_online": len(self.online_workers), "workers_pending": len(self.pending_workers), "worker_details": [] } status['job_status'] = self.mhorn.job_status() for inst in self.workers: inst_status = { "state": inst.Status, "opsworks_id": inst.InstanceId, "ec2_id": inst.Ec2InstanceId, "hostname": inst.Hostname, "mh_host_url": inst.mh_host_url, "uptime": inst.uptime(), "billed_minutes": inst.billed_minutes(), } inst_status.update(self.mhorn.node_status(inst)) # if hasattr(inst, 'Ec2InstanceId'): # inst_status['ec2_id'] = inst.Ec2InstanceId status['worker_details'].append(inst_status) return status def actions(self): stopped = [x for x in self.workers if x.action_taken == 'stopped'] started = [x for x in self.workers if x.action_taken == 'started'] return { 'total_stopped': len(stopped), 'stopped': '; '.join("%r" % x for x in stopped), 'total_started': len(started), 'started': ', '.join("%s" % x for x in started) } def start_instance(self, inst): LOGGER.info("Starting %r", inst) if not self.dry_run: self.opsworks.start_instance(InstanceId=inst.InstanceId) def stop_instance(self, inst): LOGGER.info("Stopping %r", inst) if not self.dry_run: self.opsworks.stop_instance(InstanceId=inst.InstanceId) def scale_to(self, num_workers, scale_available=False): current_workers = len(self.online_or_pending_workers) if current_workers == num_workers: raise OpsworksControllerException( "Cluster already at %d online or pending workers!" % num_workers ) elif current_workers > num_workers: self.scale('down', current_workers - num_workers) else: self.scale('up', num_workers - current_workers, scale_available) def scale(self, direction, num_workers=None, scale_available=False): if direction == "up": LOGGER.info("Attempting to scale up %d workers", num_workers) self._scale_up(num_workers, scale_available) elif direction == "auto": LOGGER.info("Initiating auto-scaling") self._scale_auto() else: with self.mhorn.in_maintenance(self.online_workers, dry_run=self.dry_run): if direction == "down": LOGGER.info("Attempting to scale down %d workers", num_workers) self._scale_down(num_workers) def _scale_auto(self): AUTOSCALE_TYPE = env('AUTOSCALE_TYPE') if not AUTOSCALE_TYPE: raise OpsworksScalingException("No autoscaling type defined") from autoscalers import create_autoscaler try: autoscaler = create_autoscaler(AUTOSCALE_TYPE, self) except Exception, e: raise OpsworksControllerException( "Failed loading autoscale type '%s'" % AUTOSCALE_TYPE ) try: LOGGER.info("Executing autoscale type: %s" , AUTOSCALE_TYPE) autoscaler.scale() except Exception, e: raise OpsworksScalingException( "Autoscale aborted: %s" % str(e) )
def test_queued_job_counts(self, mock_pyhorn): controller = MatterhornController('mh.example.edu') controller.client = MagicMock(spec_set=MHClient) controller.client.workflows.return_value = [ Mock(operations=[ Mock(id="foo", state="RUNNING", job=Mock(children=[ Mock(status="RUNNING"), Mock(status="QUEUED"), ])), Mock(id="foo", state="RUNNING", job=Mock(children=[ Mock(status="RUNNING"), Mock(status="QUEUED") ])), ]), Mock(operations=[ Mock(id="foo", state="RUNNING", job=Mock(children=[ Mock(status="QUEUED"), Mock(status="QUEUED") ])), Mock(id="bar", state="RUNNING", job=Mock(children=[ Mock(status="QUEUED"), Mock(status="QUEUED") ])), Mock(id="foo", state="INSTANTIATED") ]), Mock(operations=[ Mock(id="foo", state="INSTANTIATED"), Mock(id="foo", state="RUNNING", job=Mock(children=[ Mock(status="RUNNING"), Mock(status="RUNNING") ])), Mock(id="bar", state="WAITING", job=Mock(children=[ Mock(status="QUEUED"), Mock(status="QUEUED") ])), Mock(id="baz", state="RUNNING", job=Mock(children=[ Mock(status="RUNNING"), Mock(status="QUEUED") ])) ]) ] self.assertEqual(controller.queued_job_count(), 9) self.assertEqual(controller.queued_job_count(operation_types=["foo"]), 4) self.assertEqual(controller.queued_job_count(operation_types=["bar"]), 4) self.assertEqual(controller.queued_job_count(operation_types=["foo","bar"]), 8) self.assertEqual(controller.queued_job_count(operation_types=["foo","bar","baz"]), 9)
class OpsworksController(object): def __init__(self, cluster, force=False, dry_run=False): self.force = force self.dry_run = dry_run self.opsworks = boto3.client("opsworks") self.ec2 = boto3.resource("ec2") stacks = self.opsworks.describe_stacks()["Stacks"] try: self.stack = next(x for x in stacks if x["Name"] == cluster) except StopIteration: raise OpsworksControllerException("No opsworks stack named '%s' found" % cluster) instances = self.opsworks.describe_instances(StackId=self.stack["StackId"])["Instances"] try: mh_admin = next(x for x in instances if x["Hostname"].startswith("admin")) except StopIteration: raise OpsworksControllerException("No admin node found") self.mhorn = MatterhornController(mh_admin["PublicDns"]) self._instances = [OpsworksInstance(x, self) for x in instances] def __repr__(self): return "%s (%s)" % (self.__class__, self.stack["Name"]) @property def instances(self): return [x for x in self._instances if not x.is_autoscale()] @property def workers(self): return [x for x in self.instances if x.is_worker()] @property def online_workers(self): return [x for x in self.workers if x.is_online()] @property def pending_workers(self): return [x for x in self.workers if x.is_pending()] @property def online_or_pending_workers(self): return self.online_workers + self.pending_workers @property def idle_workers(self): return self.mhorn.filter_idle(self.online_workers) @property def stopped_workers(self): return [x for x in self.workers if x.is_stopped()] @property def online_instances(self): return [x for x in self.instances if x.is_online()] @property def admin(self): try: return next(x for x in self.instances if x.is_admin()) except StopIteration: raise OpsworksControllerException("No admin node found") def get_layer_id(self, layer_name): layers = self.opsworks.describe_layers(StackId=self.stack["StackId"])["Layers"] try: layer = next(x for x in layers if x["Name"] == layer_name) return layer["LayerId"] except StopIteration: raise OpsworksControllerException("Could not find layer '%s'" % layer_name) def get_ec2_id(self, instance_name): return next(x.Ec2InstanceId for x in self.instances if x.Hostname == instance_name) def status(self): status = { "cluster": self.stack["Name"], "matterhorn_online": self.mhorn.is_online(), "instances": len(self.instances), "instances_online": len(self.online_instances), "workers": len(self.workers), "workers_online": len(self.online_workers), "workers_pending": len(self.pending_workers), "worker_details": [], } status["job_status"] = self.mhorn.job_status() for inst in self.workers: inst_status = { "state": inst.Status, "opsworks_id": inst.InstanceId, "ec2_id": inst.Ec2InstanceId, "hostname": inst.Hostname, "mh_host_url": inst.mh_host_url, "uptime": inst.uptime(), "billed_minutes": inst.billed_minutes(), } inst_status.update(self.mhorn.node_status(inst)) # if hasattr(inst, 'Ec2InstanceId'): # inst_status['ec2_id'] = inst.Ec2InstanceId status["worker_details"].append(inst_status) return status def actions(self): stopped = [x for x in self.workers if x.action_taken == "stopped"] started = [x for x in self.workers if x.action_taken == "started"] return { "total_stopped": len(stopped), "stopped": "; ".join("%r" % x for x in stopped), "total_started": len(started), "started": ", ".join("%s" % x for x in started), } def start_instance(self, inst): LOGGER.info("Starting %r", inst) if not self.dry_run: self.opsworks.start_instance(InstanceId=inst.InstanceId) def stop_instance(self, inst): LOGGER.info("Stopping %r", inst) if not self.dry_run: self.opsworks.stop_instance(InstanceId=inst.InstanceId) def scale_to(self, num_workers, scale_available=False): current_workers = len(self.online_or_pending_workers) if current_workers == num_workers: raise OpsworksControllerException("Cluster already at %d online or pending workers!" % num_workers) elif current_workers > num_workers: self.scale("down", current_workers - num_workers) else: self.scale("up", num_workers - current_workers, scale_available) def scale(self, direction, num_workers=None, scale_available=False): if direction == "up": LOGGER.info("Attempting to scale up %d workers", num_workers) self._scale_up(num_workers, scale_available) else: with self.mhorn.in_maintenance(self.online_workers, dry_run=self.dry_run): if direction == "down": LOGGER.info("Attempting to scale down %d workers", num_workers) self._scale_down(num_workers) def autoscale(self, settings): autoscaler = Autoscaler(self, settings) try: LOGGER.info("Executing autoscaler") autoscaler.execute() except Exception, e: raise OpsworksScalingException("Autoscale aborted: %s" % str(e))