class TimeSeriesQuery(object): """ """ def __init__(self): self._api = ApiResource(CM_HOST, username=CM_USER, password=CM_PASSWD, use_tls=CM_USE_TLS) def query(self, query, from_time, to_time): return self._api.query_timeseries(query, from_time, to_time)
class TimeSeriesQuery(object): """ """ def __init__(self): self._api = ApiResource(CM_HOST, username=CM_USER, password=CM_PASSWD) def query(self, query, from_time, to_time): return self._api.query_timeseries(query, from_time, to_time)
class TimeSeriesQuery(object): def __init__(self): self._api = ApiResource(CM_HOST, username=CM_USER, password=CM_PASSWD, use_tls=False, version=VERSION) def query(self, query, from_time, to_time): return self._api.query_timeseries(query, from_time, to_time) def query_rollup(self, query, from_time, to_time, desired_rollup=None, must_use_desired_rollup=None): return timeseries.query_timeseries(self._api, query, from_time, to_time, desired_rollup, must_use_desired_rollup)
def do_call(user, password, man_host, man_port, nav_host, nav_port, app_name, app_version, app_namespace, app_time, app_start, app_end, app_dashboard, app_report_only): cpu = 0 hdfs = 0 network = 0 if app_report_only: app_start = '0' app_end = '0' dashboard_name = 'Release (' + app_namespace + ')' if not app_report_only: api = ApiResource(man_host, man_port, user, password, False, MAN_API_VERSION) with open(app_dashboard, 'r') as dashboard_data_file: dashboard_data = dashboard_data_file.read() try: create_dashboards(api, [ApiDashboard(api, dashboard_name, dashboard_data)]) except ApiException: pass for view_plot in json.loads(dashboard_data)['viewPlots']: for key, value in view_plot['plot'].items(): if key == 'tsquery': for time_series in \ api.query_timeseries(value, datetime.datetime.fromtimestamp(float(app_start)), datetime.datetime.fromtimestamp(float(app_end)))[ 0].timeSeries: if time_series.metadata.metricName == 'cpu_percent_across_hosts': cpu = compress_bins(time_series.data, 1) if time_series.metadata.metricName == 'total_bytes_read_rate_across_datanodes': hdfs += compress_bins(time_series.data, 100000) if time_series.metadata.metricName == 'total_bytes_written_rate_across_datanodes': hdfs += compress_bins(time_series.data, 100000) if time_series.metadata.metricName == 'total_bytes_receive_rate_across_network_interfaces': network += compress_bins(time_series.data, 100000) if time_series.metadata.metricName == 'total_bytes_transmit_rate_across_network_interfaces': network += compress_bins(time_series.data, 100000) properties = [ \ {'name': 'Name', 'description': 'Application name', 'value': {'Name': [app_name]}}, \ {'name': 'Version', 'description': 'Application version', 'value': {'Version': [app_version]}}, \ {'name': 'Run', 'description': 'Run time', 'value': {'Run': [app_time]}}, \ {'name': 'Start', 'description': 'Start time', 'value': {'Start': [app_start + '000']}}, \ {'name': 'Finish', 'description': 'Finish time', 'value': {'Finish': [app_end + '000']}}, \ {'name': 'CPU', 'description': 'Relative CPU usage during benchmark', 'value': {'CPU': [str(cpu)]}}, \ {'name': 'HDFS', 'description': 'Relative HDFS usage during benchmark', 'value': {'HDFS': [str(hdfs)]}}, \ {'name': 'Network', 'description': 'Relative Network usage during benchmark', 'value': {'Network': [str(network)]}} \ ] app_properties = update_metadata(user, password, nav_host, nav_port, app_namespace, 'Benchmark', properties, app_report_only) app_table_comparison = '{:<15} |{:>15} |{:>15} |{:>15} |{:>15} |{:>15} |{:>15}|' app_table = [['Application', app_name + '-' + app_version]] if not app_report_only: app_table.append(['Run', app_time + 's (' + str((int(app_time) / 60)) + 'm)']) app_table.append( ['Start', datetime.datetime.fromtimestamp(float(app_start)).strftime('%Y-%m-%d %H:%M:%S') + ' (' + app_start + '000)']) app_table.append( ['Finish', datetime.datetime.fromtimestamp(float(app_end)).strftime('%Y-%m-%d %H:%M:%S') + ' (' + app_end + '000)']) if app_properties['database']: app_table.append(['Metadata', 'http://localhost:7187/?view=detailsView&id=' + app_properties['database']]) app_dashbaord_uri = 'http://localhost:7180/cmf/views/view?viewName=' + urllib.quote_plus(dashboard_name) if app_report_only: app_table.append(['Dashboard', app_dashbaord_uri]) else: app_table.append(['Dashboard', app_dashbaord_uri + '#startTime=' + app_start + '000&endTime=' + app_end + '000']) app_table.append(['Comparison', app_table_comparison.format('Version', 'Start', 'Finish', 'Run', 'CPU', 'HDFS', 'Network')]) for properties_value in app_properties['properties']: app_table.append([None, app_table_comparison.format(', '.join(properties_value['Version']), ', '.join(properties_value['Start']), ', '.join(properties_value['Finish']), ', '.join(properties_value['Run']), ', '.join(properties_value['CPU']), ', '.join(properties_value['HDFS']), ', '.join(properties_value['Network']))]) print tabulate(app_table, tablefmt='grid')
# for m in metrics: # print "%s (%s)" % (m.name, m.unit) # 查询图标信息 import time import datetime from_time = datetime.datetime.fromtimestamp(time.time() - 180) to_time = datetime.datetime.fromtimestamp(time.time()) query = "select files_total, dfs_capacity_used " \ "where serviceName = HDFS " \ " and category = SERVICE" query1 = 'select swap_used, physical_memory_used, physical_memory_total, physical_memory_cached, physical_memory_buffers where entityName="bd8a6ef9-808a-49db-822a-4ce8146ad315"' result = api.query_timeseries(query1, from_time, to_time) ts_list = result[0] for ts in ts_list.timeSeries: print "--- %s: %s ---" % (ts.metadata.entityName, ts.metadata.metricName) for point in ts.data: print "%s:\t%s" % (point.timestamp.isoformat(), point.value) print("=============================================") # for h in api.get_all_hosts(): # for i in h.get_metrics(): # print(i.context) # # print(h.healthSummary) # print(dir(h)) # print(dir(i))
def runQuery(client, environmentName, deploymentName, clusterName, fromTime, toTime): cluster = ClustersApi(client).get(environmentName, deploymentName, clusterName) if not cluster: return #print("Cloudera Manager URL [%s]" % cluster.url) cluster_health = cluster.health.status cmUrl = urlparse(cluster.url) cm_host = cmUrl.hostname api = ApiResource(cm_host, username="******", password="******") if (cluster_health == 'NOT_AVAILABLE'): return conn = psycopg2.connect( "host=techops-meta-enc.c8ibwewzhjlc.us-east-1.rds.amazonaws.com dbname=spotfire user=spotfirerpt password=spotfire123" ) cur = conn.cursor() ################################Run Impala query##################################################### impalaQuery = "SELECT total_num_queries_rate_across_impalads WHERE entityName RLIKE '.*CD-IMPALA.*' AND category = SERVICE" result = api.query_timeseries(impalaQuery, fromTime, toTime) ts_list = result[0] # Insert every points into database for ts in ts_list.timeSeries: for point in ts.data: cur.execute( "INSERT INTO impala_usage_history (cluster_name, timestamp, average_queries) VALUES (%s, %s, %s)", (clusterName, point.timestamp, point.value)) ################################ Run YARN query ##################################################### yarnQuery = "SELECT apps_running_cumulative WHERE entityName RLIKE '.*root*' AND category = YARN_POOL" result = api.query_timeseries(yarnQuery, fromTime, toTime) ts_list = result[0] # Insert every points into database for ts in ts_list.timeSeries: for point in ts.data: cur.execute( "INSERT INTO yarn_usage_history (cluster_name, timestamp, average_app) VALUES (%s, %s, %s)", (clusterName, point.timestamp, point.value)) ################################Run HDFS query################################################## dfs_capacity_query = "SELECT dfs_capacity/(1024*1024) WHERE entityName RLIKE '.*HDFS.*' AND category = SERVICE" result = api.query_timeseries(dfs_capacity_query, fromTime, toTime) ts_list = result[0] dfs_capacity = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: dfs_capacity.update({point.timestamp: point.value}) dfs_capacity_used_query = "SELECT dfs_capacity_used/(1024*1024) WHERE entityName RLIKE '.*HDFS.*' AND category = SERVICE" result = api.query_timeseries(dfs_capacity_used_query, fromTime, toTime) ts_list = result[0] dfs_capacity_used = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: dfs_capacity_used.update({point.timestamp: point.value}) dfs_capacity_used_non_hdfs_query = "SELECT dfs_capacity_used_non_hdfs/(1024*1024) WHERE entityName RLIKE '.*HDFS.*' AND category = SERVICE" result = api.query_timeseries(dfs_capacity_used_non_hdfs_query, fromTime, toTime) ts_list = result[0] dfs_capacity_used_non_hdfs = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: dfs_capacity_used_non_hdfs.update({point.timestamp: point.value}) # Insert every points into database for point in dfs_capacity: cur.execute( "INSERT INTO hdfs_usage_history (cluster_name, timestamp, dfs_capacity,dfs_capacity_used,dfs_capacity_used_non_hdfs) VALUES (%s, %s, %s, %s, %s)", (clusterName, point, float( dfs_capacity[point]), float(dfs_capacity_used[point]), float(dfs_capacity_used_non_hdfs[point]))) ################################Run CPU query################################################## cpuquery = "SELECT cpu_percent_across_hosts WHERE entityName = '1' AND category = CLUSTER" result = api.query_timeseries(cpuquery, fromTime, toTime) ts_list = result[0] # Insert every points into database for ts in ts_list.timeSeries: for point in ts.data: cur.execute( "INSERT INTO cpu_usage_history (cluster_name, timestamp, cpu_percent_across_hosts) VALUES (%s, %s, %s)", (clusterName, point.timestamp, point.value)) ################################Run Network I/O query########################################## tbreceived_query = "SELECT total_bytes_receive_rate_across_network_interfaces where category = CLUSTER" result = api.query_timeseries(tbreceived_query, fromTime, toTime) ts_list = result[0] tbreceived = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: tbreceived.update({point.timestamp: point.value}) tbtransmit_query = "SELECT total_bytes_transmit_rate_across_network_interfaces where category = CLUSTER" result = api.query_timeseries(tbtransmit_query, fromTime, toTime) ts_list = result[0] tbtransmit = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: tbtransmit.update({point.timestamp: point.value}) # Insert every points into database for point in tbreceived: #print tbreceived[point] #print float(tbreceived[point]) cur.execute( "INSERT INTO network_usage_history (cluster_name, timestamp, total_bytes_receive_rate_across_network_interfaces,total_bytes_transmit_rate_across_network_interfaces) VALUES (%s, %s, %s, %s)", (clusterName, point, tbreceived[point], tbtransmit[point])) ###############################Run HDFS I/O query################################################# tbreadrate_query = "select total_bytes_read_rate_across_datanodes where category = SERVICE and serviceType = HDFS" result = api.query_timeseries(tbreadrate_query, fromTime, toTime) ts_list = result[0] tbreadrate = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: tbreadrate.update({point.timestamp: point.value}) tbwrittenrate_query = "select total_bytes_written_rate_across_datanodes where category = SERVICE and serviceType = HDFS" result = api.query_timeseries(tbwrittenrate_query, fromTime, toTime) ts_list = result[0] tbwrittenrate = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: tbwrittenrate.update({point.timestamp: point.value}) # Insert every points into database for point in tbreadrate: cur.execute( "INSERT INTO hdfsio_usage_history (cluster_name, timestamp, total_bytes_read_rate_across_datanodes,total_bytes_written_rate_across_datanodes) VALUES (%s, %s, %s, %s)", (clusterName, point, tbreadrate[point], tbwrittenrate[point])) ###############################Run Memory query################################################# memoryused_query = "select physical_memory_used WHERE category = HOST" result = api.query_timeseries(memoryused_query, fromTime, toTime) ts_list = result[0] memoryused = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: memoryused.update({point.timestamp: point.value}) memorytotal_query = "select physical_memory_total WHERE category = HOST" result = api.query_timeseries(memorytotal_query, fromTime, toTime) ts_list = result[0] memorytotal = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: memorytotal.update({point.timestamp: point.value}) # Insert every points into database for point in memoryused: cur.execute( "INSERT INTO memory_usage_history (cluster_name, timestamp, physical_memory_used,physical_memory_total) VALUES (%s, %s, %s, %s)", (clusterName, point, memoryused[point], memorytotal[point])) # Commit and close connections conn.commit() cur.close() conn.close()
#Get a list of all clusters cdh = None for c in api.get_all_clusters(): print c.name #Getting Metrics #Timeseries information #print time.time() from_time = datetime.fromtimestamp(time.time() - 3600 * 12) to_time = datetime.fromtimestamp(time.time()) print datetime.now() #total_physical_memory_used monitoring data f_memory = open("./data/total_physical_memory_used.out", 'a+w') query_memory = "SELECT total_physical_memory_used_across_hosts WHERE entityName = \"1\" AND category = CLUSTER" result1 = api.query_timeseries(query_memory, from_time, to_time) ts_list1 = result1[0] for ts1 in ts_list1.timeSeries: print "--- %s: %s ---" % (ts1.metadata.entityName, ts1.metadata.metricName) for point1 in ts1.data: dt1 = point1.timestamp + timedelta(hours=8) print >> f_memory, "%s\t%s" % (dt1, point1.value / 1024 / 1024 / 1024 / 1024) f_memory.close() print "Memory monitoring data in the last 12 hours written" #cpu_percent_used monitoring data f_cpu = open("./data/cpu_percent_used.out", 'a+w') query_cpu = "select cpu_percent_across_hosts where category = CLUSTER" result2 = api.query_timeseries(query_cpu, from_time, to_time) ts_list2 = result2[0]
def main(): global ec2con global cwcon ec2con = boto.ec2.connect_to_region('us-east-1') cwcon = boto.ec2.cloudwatch.CloudWatchConnection() api = ApiResource(CM_HOST, username="******", password="******") displayName = None for c in api.get_all_clusters(): displayName = c.displayName print "Cluster: %s (%s)" % (displayName, c.name) inst_cache = {} insts = api.get_all_hosts('full') print "Found %s in the cluster" % [inst.hostId for inst in insts.objects] for inst in insts.objects: clusterName = inst.roleRefs[0].clusterName if clusterName <> c.name: print 'Clusters do not correspond: %s vs %s' % (clusterName, c.name) continue cores = inst.numCores inst_id = inst.hostId inst_cache[inst_id] = my_cache = {} # For later - we'll send in one data point for every TS query # that has AWS data my_cache['aws_info_recorded'] = False # my_cache['healthSummary'] = inst.healthSummary ress = ec2con.get_all_reservations(filters={'instance-id' : inst_id}) if len(ress) > 0: print "Found %s reservations for %s: %s" % (len(ress), inst_id, ress) res = ress[0] instances = res.instances if len(instances) > 1: print "Found %s instances for %s %s" % (len(instances), inst_id, instances) inst = instances[0] if inst.id <> inst_id: raise Exception("%s != %s" % (inst.id, inst_id)) platform = inst.platform vpc_id = inst.vpc_id if platform == 'windows': product = 'Windows' elif not platform: product = 'Linux_UNIX' else: product = 'UNKNOWN' if vpc_id: product += "_Amazon_VPC" ami = inst.image_id my_cache['product'] = product my_cache['region'] = inst.region.name my_cache['zone'] = inst.placement inst_type = inst.instance_type.replace('.','_') my_cache['inst_type'] = inst_type time_f = arrow.utcnow().replace(minutes=common.DEFAULT_LOOKBACK_MINUTES) time_t = arrow.utcnow() # TODO # http://arr.gr/blog/2013/08/monitoring-ec2-instance-memory-usage-with-cloudwatch/ # http://blog.sciencelogic.com/netflix-steals-time-in-the-cloud-and-from-users/03/2011 # https://www.stackdriver.com/cpu-steal-why-aws-cloudwatch-metrics-are-different-than-agent-metrics/ stat = cwcon.get_metric_statistics(300, time_f, time_t, 'CPUUtilization', 'AWS/EC2', ['Average','Minimum','Maximum'], { 'InstanceId' : inst_id }) # [{u'Timestamp': datetime.datetime(2014, 4, 13, 6, 5), u'Average': 0.35250000000000004, u'Minimum': 0.33, u'Maximum': 0.42, u'Unit': u'Percent'}] print 'Fetching stats for %s: %s' % (inst_id, stat) if stat: for s in stat: ts = common.ts_from_aws(s) my_cache['avg_cpu'] = float(s['Average']) else: print "No stats found for %s" % inst_id print "Querying CDH." series = api.query_timeseries('SELECT * WHERE clusterName = %s' % c.name) for entry in series.objects[0].timeSeries: # print entry.metadata.__dict__ metric = entry.metadata.metricName # internal host hostname = "" if 'hostname' in entry.metadata.attributes: host = entry.metadata.attributes['hostname'] inst_id = "" my_cache = {} if 'hostId' in entry.metadata.attributes: inst_id = entry.metadata.attributes['hostId'] if inst_id not in my_cache: print "Cannot find %s in %s" % (inst_id, inst_cache) my_cache = inst_cache[inst_id] service_name = "" if 'serviceName' in entry.metadata.attributes: service_name = entry.metadata.attributes['serviceName'] service_type = "" if 'serviceType' in entry.metadata.attributes: service_type= entry.metadata.attributes['serviceType'] role_type = "" if 'roleType' in entry.metadata.attributes: role_type = entry.metadata.attributes['roleType'] num = entry.metadata.unitNumerators denom = entry.metadata.unitDenominators if len(num) > 1: print "Num:" + num if len(denom)>1: print "Denom:" + denom unit = num[0] if len(denom) > 0: unit += denom[0] tags = { 'cdh_service_name_service_type_role_type' : "%s.%s.%s" % ( service_name, service_type, role_type), 'unit' : unit } combined_tags = deepcopy(tags) if my_cache: # combined_tags['healthSummary']= my_cache['healthSummary'] combined_tags['inst_type'] = my_cache['inst_type'] combined_tags['cloud'] = 'aws' combined_tags['region'] = my_cache['region'] combined_tags['zone'] = my_cache['zone'] combined_tags['product'] = my_cache['product'] if not entry.data: continue for sample in entry.data: ts = arrow.Arrow.fromdatetime(sample.timestamp).timestamp val = sample.value if len(combined_tags) > 8: print "ERROR: Too many tags: %s" % combined_tags sys.exit(0) common.otsdb_send(metric, val, combined_tags, ts, False) # Do the AWS once only if my_cache and not my_cache['aws_info_recorded']: # print my_cache combined_tags['unit'] = 'percent' if 'avg_cpu' in my_cache: common.otsdb_send('aws_average_cpu_utilization', my_cache['avg_cpu'], combined_tags, my_cache['ts'], False)
def do_call(user, password, man_host, man_port, nav_host, nav_port, app_name, app_version, app_namespace, app_time, app_start, app_end, app_dashboard, app_report_only): cpu = 0 hdfs = 0 network = 0 if app_report_only: app_start = '0' app_end = '0' dashboard_name = 'Release (' + app_namespace + ')' if not app_report_only: api = ApiResource(man_host, man_port, user, password, False, MAN_API_VERSION) with open(app_dashboard, 'r') as dashboard_data_file: dashboard_data = dashboard_data_file.read() try: create_dashboards( api, [ApiDashboard(api, dashboard_name, dashboard_data)]) except ApiException: pass for view_plot in json.loads(dashboard_data)['viewPlots']: for key, value in view_plot['plot'].items(): if key == 'tsquery': for time_series in \ api.query_timeseries(value, datetime.datetime.fromtimestamp(float(app_start)), datetime.datetime.fromtimestamp(float(app_end)))[ 0].timeSeries: if time_series.metadata.metricName == 'cpu_percent_across_hosts': cpu = compress_bins(time_series.data, 1) if time_series.metadata.metricName == 'total_bytes_read_rate_across_datanodes': hdfs += compress_bins(time_series.data, 100000) if time_series.metadata.metricName == 'total_bytes_written_rate_across_datanodes': hdfs += compress_bins(time_series.data, 100000) if time_series.metadata.metricName == 'total_bytes_receive_rate_across_network_interfaces': network += compress_bins(time_series.data, 100000) if time_series.metadata.metricName == 'total_bytes_transmit_rate_across_network_interfaces': network += compress_bins(time_series.data, 100000) properties = [ \ {'name': 'Name', 'description': 'Application name', 'value': {'Name': [app_name]}}, \ {'name': 'Version', 'description': 'Application version', 'value': {'Version': [app_version]}}, \ {'name': 'Run', 'description': 'Run time', 'value': {'Run': [app_time]}}, \ {'name': 'Start', 'description': 'Start time', 'value': {'Start': [app_start + '000']}}, \ {'name': 'Finish', 'description': 'Finish time', 'value': {'Finish': [app_end + '000']}}, \ {'name': 'CPU', 'description': 'Relative CPU usage during benchmark', 'value': {'CPU': [str(cpu)]}}, \ {'name': 'HDFS', 'description': 'Relative HDFS usage during benchmark', 'value': {'HDFS': [str(hdfs)]}}, \ {'name': 'Network', 'description': 'Relative Network usage during benchmark', 'value': {'Network': [str(network)]}} \ ] app_properties = update_metadata(user, password, nav_host, nav_port, app_namespace, 'Benchmark', properties, app_report_only) app_table_comparison = '{:<15} |{:>15} |{:>15} |{:>15} |{:>15} |{:>15} |{:>15}|' app_table = [['Application', app_name + '-' + app_version]] if not app_report_only: app_table.append( ['Run', app_time + 's (' + str((int(app_time) / 60)) + 'm)']) app_table.append([ 'Start', datetime.datetime.fromtimestamp( float(app_start)).strftime('%Y-%m-%d %H:%M:%S') + ' (' + app_start + '000)' ]) app_table.append([ 'Finish', datetime.datetime.fromtimestamp( float(app_end)).strftime('%Y-%m-%d %H:%M:%S') + ' (' + app_end + '000)' ]) if app_properties['database']: app_table.append([ 'Metadata', 'http://localhost:7187/?view=detailsView&id=' + app_properties['database'] ]) app_dashbaord_uri = 'http://localhost:7180/cmf/views/view?viewName=' + urllib.quote_plus( dashboard_name) if app_report_only: app_table.append(['Dashboard', app_dashbaord_uri]) else: app_table.append([ 'Dashboard', app_dashbaord_uri + '#startTime=' + app_start + '000&endTime=' + app_end + '000' ]) app_table.append([ 'Comparison', app_table_comparison.format('Version', 'Start', 'Finish', 'Run', 'CPU', 'HDFS', 'Network') ]) for properties_value in app_properties['properties']: app_table.append([ None, app_table_comparison.format(', '.join(properties_value['Version']), ', '.join(properties_value['Start']), ', '.join(properties_value['Finish']), ', '.join(properties_value['Run']), ', '.join(properties_value['CPU']), ', '.join(properties_value['HDFS']), ', '.join(properties_value['Network'])) ]) print tabulate(app_table, tablefmt='grid')