def delete_cluster(module, redshift): """ Delete a cluster. module: Ansible module object redshift: authenticated redshift connection object """ identifier = module.params.get('identifier') wait = module.params.get('wait') wait_timeout = module.params.get('wait_timeout') try: redshift.delete_custer( identifier ) except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) if wait: try: wait_timeout = time.time() + wait_timeout resource = redshift.describe_clusters(identifier)['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] while wait_timeout > time.time() and resource['ClusterStatus'] != 'deleting': time.sleep(5) if wait_timeout <= time.time(): module.fail_json(msg = "Timeout waiting for resource %s" % resource.id) resource = redshift.describe_clusters(identifier)['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) return(True, {})
def modify_cluster(module, redshift): """ Modify an existing cluster. module: Ansible module object redshift: authenticated redshift connection object """ identifier = module.params.get('identifier') wait = module.params.get('wait') wait_timeout = module.params.get('wait_timeout') # Package up the optional parameters params = {} for p in ('cluster_type', 'cluster_security_groups', 'vpc_security_group_ids', 'cluster_subnet_group_name', 'availability_zone', 'preferred_maintenance_window', 'cluster_parameter_group_name', 'automated_snapshot_retention_period', 'port', 'cluster_version', 'allow_version_upgrade', 'number_of_nodes', 'new_cluster_identifier', 'enhanced_vpc_routing'): if p in module.params: params[p] = module.params.get(p) try: redshift.describe_clusters(identifier)['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] except boto.exception.JSONResponseError as e: try: redshift.modify_cluster(identifier, **params) except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) try: resource = redshift.describe_clusters(identifier)['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) if wait: try: wait_timeout = time.time() + wait_timeout time.sleep(5) while wait_timeout > time.time() and resource['ClusterStatus'] != 'available': time.sleep(5) if wait_timeout <= time.time(): module.fail_json(msg="Timeout waiting for resource %s" % resource.id) resource = redshift.describe_clusters(identifier)['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] except boto.exception.JSONResponseError as e: # https://github.com/boto/boto/issues/2776 is fixed. module.fail_json(msg=str(e)) return(True, _collect_facts(resource))
def describe_cluster(module, redshift): """ Collect data about the cluster. module: Ansible module object redshift: authenticated redshift connection object """ identifier = module.params.get('identifier') try: resource = redshift.describe_clusters(identifier)['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) return(True, _collect_facts(resource))
def create_cluster(module, redshift): """ Create a new cluster module: AnsibleModule object redshift: authenticated redshift connection object Returns: """ identifier = module.params.get('identifier') node_type = module.params.get('node_type') username = module.params.get('username') password = module.params.get('password') wait = module.params.get('wait') wait_timeout = module.params.get('wait_timeout') changed = True # Package up the optional parameters params = {} for p in ('db_name', 'cluster_type', 'cluster_security_groups', 'vpc_security_group_ids', 'cluster_subnet_group_name', 'availability_zone', 'preferred_maintenance_window', 'cluster_parameter_group_name', 'automated_snapshot_retention_period', 'port', 'cluster_version', 'allow_version_upgrade', 'number_of_nodes', 'publicly_accessible', 'encrypted', 'elastic_ip'): if p in module.params: params[p] = module.params.get(p) try: redshift.describe_clusters(identifier)['DescribeClustersResponse'][ 'DescribeClustersResult']['Clusters'][0] changed = False except boto.exception.JSONResponseError as e: try: redshift.create_cluster(identifier, node_type, username, password, **params) except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) try: resource = redshift.describe_clusters( identifier )['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) if wait: try: wait_timeout = time.time() + wait_timeout time.sleep(5) while wait_timeout > time.time( ) and resource['ClusterStatus'] != 'available': time.sleep(5) if wait_timeout <= time.time(): module.fail_json(msg="Timeout waiting for resource %s" % resource.id) resource = redshift.describe_clusters( identifier)['DescribeClustersResponse'][ 'DescribeClustersResult']['Clusters'][0] except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) return (changed, _collect_facts(resource))
def check(self, instance): name, cluster_name, cluster_address, cluster_port, db_name, user_name, user_password, \ aws_access_key_id, aws_secret_access_key, aws_region, query, \ tags = self._load_conf(instance) start = time.time() service_check_tags = [ 'name:%s' % name ] if cluster_address is None and cluster_port is None: service_check_tags.append('cluster_name:%s' % cluster_name) else: service_check_tags.append('cluster_address:%s' % cluster_address) service_check_tags.append('cluster_port:%s' % cluster_port) conn = None try: if cluster_address is None and cluster_port is None: redshift = boto.redshift.connect_to_region(aws_region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) clusters = redshift.describe_clusters(cluster_name) if len(clusters) == 0: raise Exception('Cluster is empty') cluster = clusters['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] endpoint = cluster['Endpoint'] cluster_address = endpoint['Address'] cluster_port = endpoint['Port'] connect_timeout = self.init_config.get('connect_timeout', 3) conn = psycopg2.connect( host=cluster_address, port=cluster_port, database=db_name, user=user_name, password=user_password, connect_timeout=connect_timeout, ) min_collection_interval = instance.get('min_collection_interval', self.init_config.get( 'min_collection_interval', self.DEFAULT_MIN_COLLECTION_INTERVAL ) ) today = datetime.datetime.utcnow() starttime = (today - datetime.timedelta(seconds=min_collection_interval)).strftime('%Y-%m-%d %H:%M:%S.%f') endtime = today.strftime('%Y-%m-%d %H:%M:%S.%f') if query: results = self._db_query(conn, QUERY_TABLE_COUNT) self.gauge('aws.redshift_status.table_count', results[0][0], tags=tags) results = self._db_query(conn, QUERY_NODE) for row in results: gauge_tags = tags[:] gauge_tags.append('node:%d' % row[0]) self.gauge('aws_redshift_status.node_slice', row[1], tags=gauge_tags) results = self._db_query(conn, QUERY_TABLE) for row in results: gauge_tags = tags[:] gauge_tags.append('table:%s' % row[0]) self.gauge('aws_redshift_status.table', row[1], tags=gauge_tags) results = self._db_query(conn, QUERY_TABLE_STATUS) for row in results: gauge_tags = tags[:] gauge_tags.append('table:%s' % row[0]) self.gauge('aws_redshift_status.table_status.size', row[1], tags=gauge_tags) self.gauge('aws_redshift_status.table_status.tbl_rows', row[2], tags=gauge_tags) self.gauge('aws_redshift_status.table_status.skew_rows', row[3], tags=gauge_tags) for q in [ 'select', 'insert', 'update', 'delete', 'analyze' ]: results = self._db_query(conn, QUERY_LOG_TYPE % (starttime, endtime, '%s %%' % q)) for row in results: self.gauge('aws_redshift_status.query.%s' % q, row[0], tags=tags) running_time = time.time() - start self.gauge('aws_redshift_status.response_time', running_time, tags=tags) self.service_check( 'aws_redshift_status.up', AgentCheck.OK, tags=service_check_tags, ) except Exception, e: self.warning(e) self.service_check( 'aws_redshift_status.up', AgentCheck.WARNING, tags=tags, message='Exception - %s' % (e) )
def create_cluster(module, redshift): """ Create a new cluster module: AnsibleModule object redshift: authenticated redshift connection object Returns: """ identifier = module.params.get('identifier') node_type = module.params.get('node_type') username = module.params.get('username') password = module.params.get('password') wait = module.params.get('wait') wait_timeout = module.params.get('wait_timeout') changed = True # Package up the optional parameters params = {} for p in ('db_name', 'cluster_type', 'cluster_security_groups', 'vpc_security_group_ids', 'cluster_subnet_group_name', 'availability_zone', 'preferred_maintenance_window', 'cluster_parameter_group_name', 'automated_snapshot_retention_period', 'port', 'cluster_version', 'allow_version_upgrade', 'number_of_nodes', 'publicly_accessible', 'encrypted', 'elastic_ip', 'enhanced_vpc_routing'): if p in module.params: params[p] = module.params.get(p) try: redshift.describe_clusters(identifier)['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] changed = False except boto.exception.JSONResponseError as e: try: redshift.create_cluster(identifier, node_type, username, password, **params) except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) try: resource = redshift.describe_clusters(identifier)['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) if wait: try: wait_timeout = time.time() + wait_timeout time.sleep(5) while wait_timeout > time.time() and resource['ClusterStatus'] != 'available': time.sleep(5) if wait_timeout <= time.time(): module.fail_json(msg="Timeout waiting for resource %s" % resource.id) resource = redshift.describe_clusters(identifier)['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] except boto.exception.JSONResponseError as e: module.fail_json(msg=str(e)) return(changed, _collect_facts(resource))
def check(self): logging.info('check info') try: yaml_file = os.environ.get('DATADOG_CONF', '%s/aws_redshift_status.yaml' % config.get_confd_path()) yaml_data = yaml.load(file(yaml_file)) init_config = yaml_data['init_config'] interval = init_config.get('min_collection_interval', 300) stats = ThreadStats() stats.start(flush_interval=10, roll_up_interval=1, device=None, flush_in_thread=False, flush_in_greenlet=False, disabled=False) start = time.time() for instance in yaml_data['instances']: logging.debug('instance name is %s' % instance['name']) name, cluster_name, cluster_address, cluster_port, db_name, user_name, user_password, \ aws_access_key_id, aws_secret_access_key, aws_region, query, \ tags = self._load_conf(instance) if cluster_address is None and cluster_port is None: redshift = boto.redshift.connect_to_region(aws_region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) clusters = redshift.describe_clusters(cluster_name) if len(clusters) == 0: raise Exception('Cluster is empty') cluster = clusters['DescribeClustersResponse']['DescribeClustersResult']['Clusters'][0] endpoint = cluster['Endpoint'] cluster_address = endpoint['Address'] cluster_port = endpoint['Port'] conn = None try: connect_timeout = init_config.get('connect_timeout', 5) conn = psycopg2.connect( host=cluster_address, port=cluster_port, database=db_name, user=user_name, password=user_password, connect_timeout=connect_timeout, ) today = datetime.datetime.utcnow() starttime = (today - datetime.timedelta(seconds=interval)).strftime('%Y-%m-%d %H:%M:%S.%f') endtime = today.strftime('%Y-%m-%d %H:%M:%S.%f') results = self._db_query(conn, QUERY_TABLE_COUNT) stats.gauge('aws.redshift_status.table_count', results[0][0], tags=tags) logging.debug('aws.redshift_status.table_count is %s' % results[0][0]) results = self._db_query(conn, QUERY_NODE) for row in results: gauge_tags = tags[:] gauge_tags.append('node:%s' % row[0]) stats.gauge('aws_redshift_status.node_slice', row[1], tags=gauge_tags) logging.debug('aws_redshift_status.node_slice is %s' % row[1]) results = self._db_query(conn, QUERY_TABLE_RECORD) for row in results: gauge_tags = tags[:] gauge_tags.append('table:%s' % row[0]) stats.gauge('aws_redshift_status.table_records', row[1], tags=gauge_tags) logging.debug('aws_redshift_status.table_records is %s' % row[1]) results = self._db_query(conn, QUERY_TABLE_STATUS) for row in results: gauge_tags = tags[:] gauge_tags.append('table:%s' % row[0]) stats.gauge('aws_redshift_status.table_status.size', row[1], tags=gauge_tags) logging.debug('aws_redshift_status.table_status.size is %s' % row[1]) stats.gauge('aws_redshift_status.table_status.tbl_rows', row[2], tags=gauge_tags) logging.debug('aws_redshift_status.table_status.tbl_rows is %s' % row[2]) stats.gauge('aws_redshift_status.table_status.skew_rows', row[3], tags=gauge_tags) logging.debug('aws_redshift_status.table_status.skew_rows is %s' % row[3]) for q in [ 'select', 'insert', 'update', 'delete', 'analyze' ]: results = self._db_query(conn, QUERY_LOG_TYPE % (starttime, endtime, '%s %%' % q)) for row in results: stats.gauge('aws_redshift_status.query.%s' % q, row[0], tags=tags) logging.debug('aws_redshift_status.query.%s is %s' % (q, row[0])) running_time = time.time() - start stats.gauge('aws_redshift_status.response_time', running_time, tags=tags) logging.debug('aws_redshift_status.response_time is %s' % running_time) finally: if conn: conn.close() stats.flush() stop = stats.stop() logging.debug('Stopping is %s' % stop) except Exception: logging.warning(sys.exc_info())