def send_server_metric(self, name, value, hostname=None): ''' Send server path metric to graphite. ''' message = '%s.%s%s' % (self.get_server_prefix(hostname), name, value) if self.debug: logging.info(message) return True return self.send_metric(message)
def delete_snapshot(self, snap_id): ''' Delete a snapshot ''' logging.info('Deleting snapshot: %s' % snap_id) try: return self.conn.delete_snapshot(snap_id) except EC2ResponseError, msg: logging.error(msg.message) return None
def create_snapshot(self, vol_id, desc): """ Creates an EBS Snapshot """ logging.info('Creating Snapshot from %s' % vol_id) try: snap = self.conn.create_snapshot(vol_id, description=desc) except EC2ResponseError, msg: logging.error(msg.message) return None
def create_ebs(self, used_az, host, instance_id): """ Create new EBS volumes. """ cgebs = c3.aws.ec2.ebs.C3EBS(self.conn) for ebsv in self.cconfig.get_ebs_config(): logging.info("Creating EBS volume %s for %s" % (ebsv["device"], host)) volume = cgebs.create_volume(ebsv["size"], used_az, ebsv["type"], ebsv["iops"]) # pylint: disable=maybe-no-member self.volume_instances[volume.id] = instance_id self.volume_devices[volume.id] = ebsv["device"]
def attach_ebs(self): """ Attaches EBS volumes to instances. """ cgebs = c3.aws.ec2.ebs.C3EBS(self.conn) for vol_id in self.volume_instances: logging.info( "Attaching EBS volume %s on %s to %s" % (vol_id, self.volume_instances[vol_id], self.volume_devices[vol_id]) ) cgebs.attach_volume(vol_id, self.volume_instances[vol_id], self.volume_devices[vol_id]) cgebs.set_ebs_del_on_term(self.volume_instances[vol_id], self.volume_devices[vol_id])
def cluster_retag(self): """ Retag the cluster. """ logging.info("Retagging cluster %s" % self.cconfig.get_primary_sg()) cgc = self.cluster() tagger = cluster_tagger(self.conn, verbose=self.opts.verbose) if not tagger.add_tags(cgc.get_instance_ids(), self.cconfig.get_tagset()): logging.error("Problem addings tags") sys.exit(1) logging.info("Retag cluster complete") sys.exit(0)
def tag_s3_bucket(self, rid, tagset): ''' Tags S3 buckets with complicated s3 tagging shenanigans ''' new_tags = list() failed = 0 logging.info('Tagging S3 bucket %s' % rid) try: bucket = self.conn.get_bucket(rid) except S3ResponseError, msg: logging.error(msg.message) failed += 1
def create_elb(self): ''' Create an ELB ''' logging.debug('Create ELB %s' % self.name, self.verbose) try: self.elb = self.conn.create_load_balancer( self.name, self.azs_used, self.elb_listeners) logging.info('Created %s: %s' % (self.name, self.elb)) self.created = True except BotoServerError, msg: logging.error(msg.message)
def init_elb(self): ''' Initialize ELB, create if does not exist by default ''' try: self.elb = self.conn.get_all_load_balancers( load_balancer_names=[self.name])[0] logging.debug('Found existing ELB: %s' % self.name, self.verbose) except (IndexError, BotoServerError), msg: if self.find_only: logging.info('%s does not exist' % self.name) return False
def set_ebs_del_on_term(self, instance_id, device): """ Set volumes to delete upon termination """ mvolume = [device + '=true'] logging.info( "Setting delete attribute for %s on %s" % (mvolume, instance_id)) try: self.conn.modify_instance_attribute( instance_id, 'blockDeviceMapping', mvolume) return True except EC2ResponseError, msg: logging.error(msg.message) return None
def set_hc(self): ''' Ensure HC is set for ELB ''' hck = HealthCheck( self.conf.hc_access_point, self.conf.hc_interval, self.conf.hc_target, self.conf.hc_healthy_threshold, self.conf.hc_unhealthy_threshold) logging.info('Configuring HC: %s' % hck) try: self.elb.configure_health_check(hck) except BotoServerError, msg: logging.error(msg.message)
def wait_for_servers(self, servers, start_time, success): """ Wait for servers that didn't fail to start to start up. """ started = 0 failed = 0 azs_used = list() logging.info("Waiting for %d server(s)to start" % success) stime = self.cconfig.get_sleep_step() for host in self.hostnames: status = servers[host].analyze_state() while status == 1: if check_timed_out(start_time, self.cconfig.get_launch_timeout(), verbose=self.opts.verbose): logging.error("%s failed to start before time out" % host) break logging.info("Wating for %s to enter running state" % host) time.sleep(stime) status = servers[host].analyze_state() if status == 0: started += 1 if servers[host].get_az() not in azs_used: azs_used.append(servers[host].get_az()) logging.info("%s is running" % host) elif status == 2: failed += 1 logging.debug("%d started, %d failed, of %d total" % (started, failed, success), self.opts.verbose) if failed == success: logging.error("All %d server(s) failed to start" % failed) sys.exit(1) elif started != success: logging.error("%d started, %d failed to start" % (started, failed)) else: logging.info("%d of %d started" % (started, success)) if self.cconfig.elb.enabled and azs_used: self.setup_elb(servers, azs_used)
def __init__(self, conn, name, find_only=False, verbose=False): self.conn = conn self.name = name self.sgrp = None self.find_only = find_only self.verbose = verbose try: self.sgrp = conn.get_all_security_groups(name)[0] except (IndexError, EC2ResponseError), msg: if not self.find_only: logging.info("Creating SG %s" % self.name) self.sgrp = self.create() else: logging.error(msg.message)
def hibernate(self): ''' Hibernate instances in cluster. ''' count = 0 for iid in self.c3instances: if iid.hibernate(): logging.info('Waiting for %s to stop' % iid.name) if wait_for_instance(iid, desired_state='down', verbose=self.verbose): count += 1 if count != len(self.c3instances): logging.warn( 'Asked for %d but only %d stopped' % (len(self.c3instances), count)) return count
def re_associate_eip(self, steal=False, eip=None): ''' Reassociates an EIP address to an EC2 instance ''' if self.node_db: logging.debug('Checking for EIP', self.verbose) eip = self.get_eip(steal) else: logging.info('No external data source is defined, skipping') if eip: logging.debug( 'Will re-associate EIP %s to %s' % (eip.public_ip, self.inst_id), self.verbose) self.reeip = eip else: self.reeip = None
def wake(self): ''' Wake instances in cluster. ''' count = 0 for iid in self.c3instances: if iid.wake(): logging.info('Waiting for %s to start' % iid.name) if wait_for_instance(iid, verbose=self.verbose): logging.debug('Wait for %s successful' % iid.name, self.verbose) count += 1 if count != len(self.c3instances): logging.warn( 'Asked for %d but only %d started' % (len(self.c3instances), count)) return count
def send_metric(self, name, value): ''' Send custom path metric to graphite. ''' message = "%s %s %s" % (name, value, int(time.time())) if self.debug: logging.info(message) return True if not self._sock: self.connect() if self._sock_status: logging.info("Sending %s %s to %s" % (name, value, self.server)) try: self._sock.sendall(message + "\n") except socket.gaierror, msg: logging.error(msg) self._sock = None return False
def cluster_destroy(self): """ Destroy mode, delete all components for this cluster. """ logging.info("Tearing down %s in %s" % (self.cconfig.get_primary_sg(), self.cconfig.get_aws_region())) cgc = self.cluster() count = cgc.destroy() logging.info("Terminated %d instance(s)" % count) if self.cconfig.elb.enabled: c3elb = self.elb_connection() if c3elb.destroy(): logging.info("ELB %s deleted" % c3elb.name) else: logging.error("Deleting ELB %s failed") sgrp = c3.aws.ec2.security_groups.SecurityGroups(self.conn, self.cconfig.get_primary_sg(), find_only=True) if sgrp.destroy(): logging.info("Security Group %s removed" % sgrp.name) logging.info("Tear down complete for %s" % self.cconfig.get_primary_sg()) sys.exit(0)
def _add_tags(self, rid, tagset, tag_type=None): ''' Set one or more tags on a single ID ''' failed = 0 if tag_type == 'ec2' or rid[:2] == 'i-': rid = self.conn.get_all_instances([rid]) instance = rid[0].instances[0] for tname, tvalue in tagset.items(): logging.info('For %s adding, %s: %s' % (instance, tname, tvalue)) try: instance.add_tag(tname, tvalue) except EC2ResponseError, msg: logging.error(msg.message) failed += 1 logging.info('Checking for attached EBS volumes') try: volumes = self.conn.get_all_volumes() except EC2ResponseError, msg: logging.error(msg.message) failed += 1
def destroy(self): ''' Terminates instances in cluster. ''' count = 0 for iid in self.c3instances: if iid.get_state() not in ['terminated']: if iid.destroy(): logging.info( 'Waiting for %s (%s) to terminate' % (iid.name, iid.inst_id)) if wait_for_instance(iid, desired_state='down', verbose=self.verbose): count += 1 else: logging.warn('%s already teriminated' % iid.name) count += 1 if count != len(self.c3instances): logging.warn( 'Asked for %d but only %d terminated' % (len(self.c3instances), count)) return count
def cluster_wake(self): """ Wake a hibernating cluster. """ logging.info("Waking up %s" % self.cconfig.get_primary_sg()) cgc = self.cluster() count = cgc.wake() logging.info("%d instance(s) in %s have been started" % (count, self.cconfig.get_primary_sg())) logging.info("Waking up instances complete") sys.exit(0)
def cluster_hibernate(self): """ Hibernates a running cluster. """ logging.info("Hibernating %s" % self.cconfig.get_primary_sg()) cgc = self.cluster() count = cgc.hibernate() logging.info("%d instance(s) in %s have been hibernated" % (count, self.cconfig.get_primary_sg())) logging.info("Hibernating instances complete") sys.exit(0)
def destroy_eip(self): ''' Destroy EIP address associated with an EC2 instance ''' eip = self.get_associated_eip() if eip: logging.info('Disassociating EIP from %s' % self.name) try: eip.disassociate() except EC2ResponseError, msg: logging.error(msg.message) return None tries = 1 while tries <= 10: try: eip.release() logging.debug( 'EIP released on try %d' % tries, self.verbose) return True except EC2ResponseError, msg: logging.error(msg.message) return None tries += 1 time.sleep(5)
def cluster_status(self): """ Check the status of a cluster. """ logging.info("Checking status for %s" % self.cconfig.get_primary_sg()) cgc = self.cluster() for instance in cgc.c3instances: elbm = None elb_hc = None elb_azs = None ebs_vols = None try: c3elb = self.elb_connection() except TypeError: c3elb = None if c3elb: if c3elb.instance_configured(instance.inst_id): elbm = c3elb.get_dns() elb_hc = c3elb.get_hc() elb_azs = c3elb.get_azs() ebsm = instance.get_ebs_optimized() eipm = instance.get_associated_eip() vols = instance.get_non_root_volumes() if vols: ebs_vols = list() for key, value in vols.items(): ebs_vols.append("%s: %s" % (str(key), str(value))) msg = """ Instance %s ID: %s State: %s EBS Optimized: %s EBS Volumes: %s EIP: %s ELB %s Health Check: %s Availability Zones: %s """ % ( instance.name, instance.inst_id, instance.state, ebsm, ebs_vols, eipm, elbm, elb_hc, elb_azs, ) logging.info(msg) logging.info("Status complete") sys.exit(0)
logging.info( "Setting delete attribute for %s on %s" % (mvolume, instance_id)) try: self.conn.modify_instance_attribute( instance_id, 'blockDeviceMapping', mvolume) return True except EC2ResponseError, msg: logging.error(msg.message) return None def create_snapshot(self, vol_id, desc): """ Creates an EBS Snapshot """ logging.info('Creating Snapshot from %s' % vol_id) try: snap = self.conn.create_snapshot(vol_id, description=desc) except EC2ResponseError, msg: logging.error(msg.message) return None logging.info('Snapshot created %s: %s' % (snap.id, desc)) return snap def delete_snapshot(self, snap_id): ''' Delete a snapshot ''' logging.info('Deleting snapshot: %s' % snap_id) try: return self.conn.delete_snapshot(snap_id) except EC2ResponseError, msg: logging.error(msg.message) return None
self.elb.deregister_instances(instances) except BotoServerError, msg: logging.error(msg.message) def set_azs(self): ''' Ensure AZs add to ELB from config ''' azs = self.azs_used logging.debug("Trying to add AZs to ELB: %s" % azs, self.verbose) for zone in azs: if zone not in self.elb.availability_zones: logging.debug("Adding %s to ELB" % azs, self.verbose) try: self.elb.enable_zones(zone) except BotoServerError, msg: logging.error(msg.message) logging.info('Zones configured for ELB: %s' % self.azs_used) def set_hc(self): ''' Ensure HC is set for ELB ''' hck = HealthCheck( self.conf.hc_access_point, self.conf.hc_interval, self.conf.hc_target, self.conf.hc_healthy_threshold, self.conf.hc_unhealthy_threshold) logging.info('Configuring HC: %s' % hck) try: self.elb.configure_health_check(hck) except BotoServerError, msg: logging.error(msg.message)
def get_cost_tags(self, rid, tag_type=None): ''' Return cost tags ''' healthy, ret = self.get_cost_tags_health(rid, tag_type=tag_type) logging.info('Retrived tags: %s Healhty: %s' % (ret, healthy)) return ret
logging.info('For %s adding, %s: %s' % (instance, tname, tvalue)) try: instance.add_tag(tname, tvalue) except EC2ResponseError, msg: logging.error(msg.message) failed += 1 logging.info('Checking for attached EBS volumes') try: volumes = self.conn.get_all_volumes() except EC2ResponseError, msg: logging.error(msg.message) failed += 1 for vol in volumes: if vol.attach_data.instance_id == instance.id: logging.info('Found attached vol: %s' % vol.id) self._add_tags(vol.id, tagset, tag_type='ebs') elif tag_type == 'ebs' or rid[:4] == 'vol-': try: vol = self.conn.get_all_volumes([rid])[0] except EC2ResponseError, msg: logging.error(msg.message) failed += 1 for tname, tvalue in tagset.items(): logging.info('For %s adding, %s:%s' % (vol.id, tname, tvalue)) try: vol.add_tag(tname, tvalue) except EC2ResponseError, msg: logging.error(msg.message) failed += 1 elif tag_type == 'rds':
def cluster_create(self): """ Provisions a new cluster based on a config. """ self.conn = self.aws_conn("ec2") node_db = nv_connect(self.opts.nv_ini) success = 0 failed = 0 self.check_config_types() logging.info("Applying SG Rules to %s" % self.cconfig.get_primary_sg()) self.sg_rules() if self.cconfig.get_count(): servers = dict() logging.debug( "Creating %d %s in %s using %s." % (self.cconfig.get_count(), self.cconfig.get_size(), self.cconfig.get_azs(), self.cconfig.get_ami()), self.opts.verbose, ) self.hostnames = c3.utils.naming.find_available_hostnames( self.cconfig.get_primary_sg(), self.cconfig.get_count(), self.cconfig.get_aws_account(), self.cconfig.get_aws_region(), "ctgrd.com", node_db, ) start_time = time.time() logging.debug("Creating new servers: %s" % self.hostnames, self.opts.verbose) for host in self.hostnames: servers[host] = C3Instance(conn=self.conn, node_db=node_db, verbose=self.opts.verbose) userdata = self.cconfig.get_user_data(self.userdata_replacements(host)) tries = 1 if self.opts.substitute_zones: tries = len(self.cconfig.get_azs()) while tries > 0: tries -= 1 used_az = self.cconfig.get_next_az() logging.info("Starting %s in %s" % (host, used_az)) instance = servers[host].start( self.cconfig.get_ami(), self.cconfig.get_ssh_key(), self.cconfig.get_sgs(), userdata, host, self.cconfig.get_size(), used_az, self.cconfig.get_node_groups(), self.cconfig.get_allocate_eips(), self.cconfig.get_use_ebs_optimized(), self.cconfig.get_placement_group(), ) if instance: success += 1 break else: if tries: logging.warn("Failed to create %s in %s, retrying" % (host, used_az)) else: logging.error("Failed to create %s in all AZs, trying next instance" % host) failed += 1 if len(self.cconfig.get_ebs_config()) > 0: self.create_ebs(used_az, host, servers[host].get_id()) if failed == self.cconfig.get_count(): logging.error("%d of %d failed to create, dying" % (failed, self.cconfig.get_count())) sys.exit(1) logging.info("%d of %d server(s) created" % (success, self.cconfig.get_count())) self.wait_for_servers(servers, start_time, success) if self.volume_instances: self.attach_ebs() self.tag_by_instance(servers) if self.cconfig.get_server_env() == "prd": self.puppet_whitelist() logging.info("Cluster config complete")