def destroy_instance(self, instance: GenericCloudInstance): log.info(f"Dummy destroying instance {instance}") for index, server in enumerate(cloud_instances): if server.id == instance.id: log.info(f"Dummy instance {instance} has id {server.id}") cloud_instances.pop(index) break
def create(source: str) -> PolicyAdapter: try: log.info("Instantiate policy adapter %s", source) obj_class = PolicyAdapterFactory.ADAPTERS[source] obj = obj_class() return obj except KeyError as ex: raise NotImplementedError(f"{ex} not implemented") from ex
def get_current_instances(self) -> List[DigitalOceanCloudInstance]: filter_tag = f"scalr={filter}" log.info(f"digitalocean: Querying with filter_tag: {filter_tag}") droplets = self.client.get_all_droplets(tag_name=filter_tag) return [ DigitalOceanCloudInstance(droplet) for droplet in sorted(droplets, key=lambda i: i.created_at) ]
def create(name: str) -> CloudAdapter: try: log.info("Instantiate cloud adapter %s", name) obj_class = CloudAdapterFactory.ADAPTERS[name] obj = obj_class() return obj except KeyError as ex: raise NotImplementedError(f"{ex} not implemented")
def cooldown(self) -> None: if self.config.dry_run: return log.info(f"Cooling down for {self.config.cooldown_timeout}s") for i in range(self.config.cooldown_timeout): time.sleep(1) log.info("Cooldown finished")
def get_current_instances(self) -> List[HcloudCloudInstance]: filter_tag = f"scalr={self.filter}" log.info(f"hcloud: Querying with filter_tag: {filter_tag}") servers = self.hcloud.servers.get_all(label_selector=filter_tag) return [ HcloudCloudInstance(server=server) for server in sorted(servers, key=lambda i: i.created) ]
def ensure_instances_running(self) -> None: log.info("digitalocean: ensure running") for instance in self.get_current_instances(): log.info( f"digitalocean: instance {instance.droplet.name} status {instance.droplet.status}" ) if instance.droplet.status == "off": instance.droplet.power_on()
def deploy_instance(self, name: str) -> None: log.info(f"vultr: Deploying new instance named {name}") launch_config = self.launch.copy() launch_config.update({ "label": name, "hostname": name, "tag": f"scalr={self.filter}", }) self.vultr.create_instance(**launch_config)
def get_current_instances(self) -> List[GenericCloudInstance]: filter_tag = f"scalr={self.filter}" log.info(f"vultr: Querying with filter_tag: {filter_tag}") servers = self.vultr.list_instances(tag=filter_tag) return [ GenericCloudInstance( id=server["id"], name=server["label"], status=server["power_status"], ) for server in sorted(servers, key=lambda i: i["date_created"]) ]
def get_current_instances(self) -> List[GenericCloudInstance]: filter_tag = f"scalr={self.filter}" log.info(f"cloudscale: Querying with filter_tag: {filter_tag}") servers = self.cloudscale.server.get_all(filter_tag=filter_tag) return [ GenericCloudInstance( id=server["uuid"], name=server["name"], status=server["status"], ) for server in sorted(servers, key=lambda i: i["created_at"]) ]
def get_current(self) -> float: prom = PrometheusConnect( url=self.config.get("url", "http://localhost:9090"), disable_ssl=self.config.get("disable_ssl", True), ) res = prom.custom_query(query=self.query) if not res: log.error("Prometheus query: no result") raise Exception("Prometheus query: no result") log.info(f"Prometheus query result: {res}") return float(res[0].get("value")[-1])
def get_scaling_factor(self) -> float: try: current = self.get_current() except Exception as ex: log.error(ex) return 1 log.info("Current metric: %s", current) log.info("Target: %s", self.target) try: return self.target / current except ZeroDivisionError: return 1
def deploy_instance(self, name: str) -> None: log.info(f"cloudscale: Deploying instance with name {name}") launch_config = self.launch.copy() tags = launch_config.get("tags", dict()) tags.update({"scalr": self.filter}) launch_config.update( { "name": name, "tags": tags, } ) self.cloudscale.server.create(**launch_config)
def scale(self, diff: int, cloud: CloudAdapter) -> None: if self.config.min > self.config.max: raise Exception(f"Error: min {self.config.min} > max {self.config.max}") if diff > 0: self.scale_up(diff, cloud) elif diff < 0: self.scale_down(diff * -1, cloud) else: log.info("No scaling action taken") if not self.config.dry_run: cloud.ensure_instances_running()
def deploy_instance(self, name) -> None: log.info(f"hcloud: Deploying instance with name {name}") launch_config = self.launch.copy() labels = launch_config.get("labels", dict()) labels.update({"scalr": self.filter}) params = { "name": name, "labels": labels, "server_type": ServerType(launch_config["server_type"]), "image": Image(launch_config["image"]), "ssh_keys": [SSHKey(ssh_key) for ssh_key in launch_config["ssh_keys"]], "location": Location(launch_config["location"]), "user_data": launch_config["user_data"], } self.hcloud.servers.create(**params)
def get_current(self) -> float: url = self.query log.info("Gather metrics from: %s", url) headers = self.config.get("headers", dict()) timeout = self.config.get("timeout", 60) key = self.config.get("key", "data") retries = 3 while retries > 0: try: r = requests.get(url, headers=headers, timeout=timeout) r.raise_for_status return r.json().get(key, -1) except Exception as ex: log.error(ex) retries -= 1 time.sleep(2) else: raise Exception("Error: Max retries reached")
def get_current_instances(self) -> List[GenericCloudInstance]: filter_tag = f"scalr={self.filter}" log.info(f"cloudstack: Querying with filter_tag: {filter_tag}") servers = self.cs.listVirtualMachines( tags=[ { "key": "scalr", "value": self.filter, } ], fetch_list=True, ) return [ GenericCloudInstance( id=server["id"], name=server["name"], status=server["state"].lower(), ) for server in sorted(servers, key=lambda i: i["created"]) ]
def ensure_instances_running(self) -> None: log.info("cloudscale: ensure running") for instance in self.get_current_instances(): log.info(f"cloudscale: instance, {instance.name} status {instance.status}") if instance.status == "stopped": self.cloudscale.server.start(uuid=instance.id) log.info(f"cloudscale: Instance {instance.name} started")
def deploy_instance(self, name: str) -> None: log.info(f"digitalocean: Deploying instance with name {name}") launch_config = self.launch.copy() launch_config.update({ "label": name, "hostname": name, "tag": f"scalr={self.filter}", }) droplet = digitalocean.Droplet( name=name, region=launch_config["region"], image=launch_config["image"], size_slug=launch_config["size"], ssh_keys=launch_config["ssh_keys"], user_data=launch_config.get("user_data", ""), ipv6=launch_config.get("ipv6", False), ) droplet.create() tag = digitalocean.Tag(name=f"scalr:{self.filter}") tag.create() tag.add_droplets([droplet.id]) log.info(f"Creating droplet {name}")
def scale_up(self, diff: int, cloud: CloudAdapter): log.info(f"Scaling up {diff}") while diff > 0: instance_name = self.get_unique_name(prefix=self.config.name) if not self.config.dry_run: log.info(f"Creating instance {instance_name}") cloud.deploy_instance(name=instance_name) else: log.info(f"Dry run creating instance {instance_name}") diff -= 1
def main() -> None: parser: ArgumentParser = ArgumentParser() parser.add_argument( "--periodic", help="run periodic", action="store_true", default=bool(os.environ.get("SCALR_PERIODIC", False)), ) parser.add_argument( "--interval", help="set interval in seconds", type=int, default=int(os.environ.get("SCALR_INTERVAL", 60)), ) parser.add_argument("--version", help="show version", action="store_true") args = parser.parse_args() if args.version: print(f"version {__version__}") sys.exit(0) log.info(f"Starting, version {__version__}") if args.periodic: try: start_http_server( int(os.environ.get("SCALR_PROMETHEUS_EXPORTER_PORT", 8000))) log.info(f"Running periodic in intervals of {args.interval}s") schedule.every(args.interval).seconds.do(app_once) time.sleep(1) schedule.run_all() while True: schedule.run_pending() time.sleep(1) except KeyboardInterrupt: print("") log.info("Stopping...") schedule.clear() log.info("done") pass else: app_once()
def scale_down(self, diff: int, cloud: CloudAdapter): log.info(f"Scaling down {diff}") instances = cloud.get_current_instances() while diff > 0: instance = self.select_instance( strategy=self.config.scale_down_selection, current_servers=instances ) if not self.config.dry_run: log.info(f"Deleting instance {instance}") cloud.destroy_instance(instance=instance) else: log.info(f"Dry run deleting instance {instance}") diff -= 1
def ensure_instances_running(self) -> None: log.info("hcloud: ensure running") for instance in self.get_current_instances(): log.info( f"hcloud: instance {instance.server.name} status {instance.server.status}" ) if instance.server.status in ["off", "stopping"]: try: self.hcloud.servers.power_on(instance.server) log.info(f"hcloud: Instance {instance.server.name} started") except APIException as e: log.error(e)
def ensure_instances_running(self) -> None: log.info("vultr: ensure running") for instance in self.get_current_instances(): log.info( f"vultr: instance {instance.name} status {instance.status}") if instance.status == "running": continue if instance.status == "stopped": try: self.vultr.start_instance(instance_id=instance.id) log.info(f"vultr: Instance {instance.name} started") except Exception as ex: log.error(ex)
def app_once() -> None: log.info("Start scaling run") cfg = ScalingConfig.parse_file(os.getenv("SCALR_CONFIG", "config.yml")) # Set exporter metrics metric_min.set(cfg.min) metric_max.set(cfg.max) metric_max_step_down.set(cfg.max_step_down) metric_dry_run.state("on" if cfg.dry_run else "off") metric_enabled.state("yes" if cfg.enabled else "no") metric_cooldown_timeout.set(cfg.cooldown_timeout) if not cfg.enabled: log.info("Not enabled, skipping...") return cloud = CloudAdapterFactory.create(cfg.cloud.kind) cloud.configure( filter=cfg.name, launch=cfg.cloud.launch_config, ) scalr = Scalr(config=cfg) factor: float = scalr.get_factor(policy_configs=cfg.policies) metric_factor.set(factor) current_size: int = len(cloud.get_current_instances()) metric_current.set(current_size) diff: int = scalr.calc_diff(factor=factor, current_size=current_size) metric_desired.set(scalr.desired) scalr.scale(diff=diff, cloud=cloud) if diff: current_size: int = len(cloud.get_current_instances()) metric_current.set(current_size) scalr.cooldown() log.info("End scaling run")
def deploy_instance(self, name: str): log.info(f"Dummy deploying instance with name {name}") uid = uuid.uuid4() cloud_instances.append( GenericCloudInstance(id=f"{uid}", name=name, status="stopped"), )
def ensure_instances_running(self): log.info("Dummy ensure running") for server in cloud_instances: if server.status != "running": log.info(f"Dummy start {server.name}") server.status = "running"
def get_current_instances(self) -> List[GenericCloudInstance]: log.info(f"Dummy returning two instances, filtered by {self.filter}") return cloud_instances
def destroy_instance(self, instance: GenericCloudInstance) -> None: log.info(f"vultr: Destroying instance {instance}") self.vultr.delete_instance(instance_id=instance.id)
def destroy_instance(self, instance: HcloudCloudInstance) -> None: log.info(f"hcloud: Destroying instance {instance}") self.hcloud.servers.delete(instance.server)