def create_cluster(client, environment_name, deployment_name, config): """ Create a new CDH cluster with data from the configuration file @param client: authenticated API client @param environment_name: the name of the parent environment @param deployment_name: the name of the parent deployment @param config: parsed configuration file """ cluster_size = config.getint("cluster", "size") template = ClusterTemplate( name=config.get("cluster", "name"), productVersions={"CDH": config.get("cluster", "cdh_version")}, services=["HDFS", "YARN", "SPARK_ON_YARN"], virtualInstanceGroups={ "masters": VirtualInstanceGroup( name="masters", minCount=1, serviceTypeToRoleTypes={ "HDFS": ["NAMENODE", "SECONDARYNAMENODE"], "YARN": ["RESOURCEMANAGER", "JOBHISTORY"], "SPARK_ON_YARN": ["SPARK_YARN_HISTORY_SERVER"], }, virtualInstances=[create_virtual_instance_with_random_id(config, "master")], ), "gateways": VirtualInstanceGroup( name="gateways", minCount=1, serviceTypeToRoleTypes={"SPARK_ON_YARN": ["GATEWAY"], "HDFS": ["GATEWAY"], "YARN": ["GATEWAY"]}, virtualInstances=[create_virtual_instance_with_random_id(config, "gateway")], ), "workers": VirtualInstanceGroup( name="workers", minCount=cluster_size, serviceTypeToRoleTypes={"HDFS": ["DATANODE"], "YARN": ["NODEMANAGER"], "SPARK_ON_YARN": ["GATEWAY"]}, roleTypesConfigs={ "HDFS": { "DATANODE": {"dfs_datanode_handler_count": "10"}, "NODEMANAGER": {"nodemanager_webserver_port": "8047"}, } }, virtualInstances=[ create_virtual_instance_with_random_id(config, "worker") for _ in range(0, cluster_size) ], ), }, ) api = ClustersApi(client) try: api.create(environment_name, deployment_name, template) except HTTPError as e: if e.code == 302: print "Warning: a cluster with the same name already exists" else: raise e print "Clusters: %s" % api.list(environment_name, deployment_name) return template.name
def wait_for_cluster(client, environment_name, deployment_name, cluster_name): """ Wait for the cluster bootstrap process to complete @param client: authenticated API client """ api = ClustersApi(client) stage = None while stage not in ["READY", "BOOTSTRAP_FAILED"]: sys.stdout.write(".") sys.stdout.flush() time.sleep(0.5) stage = api.getStatus(environment_name, deployment_name, cluster_name).stage print "\nCluster '%s' current stage is '%s'" % (cluster_name, stage)
def wait_for_cluster(client, environment_name, deployment_name, cluster_name): """ Wait for the cluster bootstrap process to complete @param client: authenticated API client """ api = ClustersApi(client) stage = None while stage not in ['READY', 'BOOTSTRAP_FAILED']: sys.stdout.write(".") sys.stdout.flush() time.sleep(0.5) stage = api.getStatus(environment_name, deployment_name, cluster_name).stage print "\nCluster '%s' current stage is '%s'" % (cluster_name, stage)
def main(arguments): # Get all command line arguments cloudera_director_server = arguments[0] admin_username = arguments[1] credentials_file_path = arguments[2] admin_password = open(credentials_file_path, 'r').read() num_lookback_dates = arguments[3] # Optional arguments for transient clusters cluster_name = '' if ((len(arguments)) > 4): cluster_name = arguments[4] # Setup a Cloudera Director Client client = ApiClient(cloudera_director_server) AuthenticationApi(client).login( Login(username=admin_username, password=admin_password)) # Get all Environments environments = EnvironmentsApi(client).list() if not environments: sys.exit(1) # Get start and end time of the query local_tz = timezone('US/Eastern') from_time = datetime.now() - timedelta(hours=8) from_time = from_time.replace(tzinfo=local_tz) to_time = datetime.now().replace(tzinfo=local_tz) # Iterate through all environments to get all deployments for environment in environments: deployments = DeploymentsApi(client).list(environment) if not deployments: continue # Iterate through all deployments to get all clusters for deployment in deployments: clusters = ClustersApi(client).list(environment, deployment) if not clusters: continue # Iterate through all clusters to run queries for cluster in clusters: #Filter only the cluster if cluster name passed as argument if (cluster_name != '' and cluster_name != cluster): continue print( "Get the usage of cluster [%s] in deployment [%s] in environment [%s] from [%s] to [%s] " % (cluster, deployment, environment, from_time, to_time)) runQuery(client, environment, deployment, cluster, from_time, to_time)
def create_cluster(client, environment_name, deployment_name, config): """ Create a new CDH cluster with data from the configuration file @param client: authenticated API client @param environment_name: the name of the parent environment @param deployment_name: the name of the parent deployment @param config: parsed configuration file """ cluster_size = config.getint("cluster", "size") template = ClusterTemplate( name=config.get('cluster', 'name'), productVersions={'CDH': config.get('cluster', 'cdh_version')}, services=['HDFS', 'YARN'], virtualInstanceGroups={ 'masters': VirtualInstanceGroup(name='masters', minCount=1, serviceTypeToRoleTypes={ 'HDFS': ['NAMENODE', 'SECONDARYNAMENODE'], 'YARN': ['RESOURCEMANAGER', 'JOBHISTORY'] }, virtualInstances=[ create_virtual_instance_with_random_id( config, 'master'), ]), 'workers': VirtualInstanceGroup(name='workers', minCount=cluster_size, serviceTypeToRoleTypes={ 'HDFS': [ 'DATANODE', ], 'YARN': ['NODEMANAGER'] }, roleTypesConfigs={ 'HDFS': { 'DATANODE': { 'dfs_datanode_handler_count': '10' }, 'NODEMANAGER': { 'nodemanager_webserver_port': '8047' } } }, virtualInstances=[ create_virtual_instance_with_random_id( config, 'worker') for _ in range(0, cluster_size) ]) }) api = ClustersApi(client) try: api.create(environment_name, deployment_name, template) except HTTPError as e: if e.code == 302: print 'Warning: a cluster with the same name already exists' else: raise e print "Clusters: %s" % api.list(environment_name, deployment_name) return template.name
def main(): parser = argparse.ArgumentParser(prog='ephemeral-spark-submit.py') parser.add_argument( '--admin-username', default="admin", help= 'Name of an user with administrative access (defaults to %(default)s)') parser.add_argument( '--admin-password', default="admin", help='Password for the administrative user (defaults to %(default)s)') parser.add_argument( '--server', default="http://localhost:7189", help="Cloudera Director server URL (defaults to %(default)s)") parser.add_argument( '--cm', help="The name of the Cloudera Manager server to use in Director") parser.add_argument('--environment', help="The name of the Environment to use in Director") parser.add_argument( '--jar', help="JAR for Spark job you want to run on ephemeral cluster") parser.add_argument('--jarclass', help="The --class flag for spark-submit") parser.add_argument('--args', help="The arguments for the jar") parser.add_argument('--script', help="Script that runs before spark job") parser.add_argument('config_file', help="Cluster configuration file (.ini)") args = parser.parse_args() if not isfile(args.config_file): print 'Error: "%s" not found or not a file' % args.config_file return -1 config = ConfigParser.SafeConfigParser() config.read(args.config_file) #Create authenticated client client = cluster.get_authenticated_client(args) #Execute cluster creation cluster_name = cluster.create_cluster(client, args.environment, args.cm, config) print 'Waiting for the cluster to be ready. Check the web interface for details.' cluster.wait_for_cluster(client, args.environment, args.cm, cluster_name) client = ApiClient(args.server) AuthenticationApi(client).login( Login(username=args.admin_username, password=args.admin_password)) clusters = ClustersApi(client) eph_cluster = clusters.get(args.environment, args.cm, cluster_name) instances = eph_cluster.instances #Find which is a gateway node for instance in instances: if str(instance.virtualInstance.template.name) == 'gateway': gateway = instance gateway = gateway.properties['publicDnsName'] print("The Gateway url is: " + gateway) #Copy the JAR and postscript to the GW copy_jar(args.jar, gateway, config) #Copy script to the GW copy_script(args.script, gateway, config) #Create directory in HDFS with correct permissions configure_hdfs(gateway, config) #Execute the job execute_spark(args.jar, args.jarclass, args.args, gateway, config) #Run some post script execute_script(args.script, gateway, config) #Destroy the cluster print "Job complete, terminating the instance" clusters.delete(args.environment, args.cm, cluster_name) return 0
def create_cluster(client, environment_name, deployment_name, config): """ Create a new CDH cluster with data from the configuration file @param client: authenticated API client @param environment_name: the name of the parent environment @param deployment_name: the name of the parent deployment @param config: parsed configuration file """ num_workers = config.getint("cluster", "num_workers") template = ClusterTemplate( name=config.get('cluster', 'name'), product_versions={'CDH': config.get('cluster', 'cdh_version')}, services=['HDFS', 'YARN'], services_configs={}, virtual_instance_groups={ 'masters': VirtualInstanceGroup( name='masters', min_count=1, service_type_to_role_types={ 'HDFS': ['NAMENODE', 'SECONDARYNAMENODE'], 'YARN': ['RESOURCEMANAGER', 'JOBHISTORY'] }, role_types_configs={}, virtual_instances=[create_virtual_instance(config, 'master')]), 'workers': VirtualInstanceGroup( name='workers', min_count=num_workers, service_type_to_role_types={ 'HDFS': ['DATANODE'], 'YARN': ['NODEMANAGER'] }, # optional role configurations, if desired or needed role_types_configs={ #'HDFS': { # 'DATANODE': { # 'dfs_datanode_handler_count': '10' # }, # 'NODEMANAGER': { # 'nodemanager_webserver_port': '8047' # } #} }, virtual_instances=[ create_virtual_instance(config, 'worker') for _ in range(0, num_workers) ]) }) api = ClustersApi(client) try: api.create(environment_name, deployment_name, template) except ApiException as exc: if exc.status == 409: print 'Warning: a cluster with the same name already exists' else: raise exc print "Clusters: %s" % api.list(environment_name, deployment_name) return template.name
def main(): parser = argparse.ArgumentParser(prog="ephemeral-spark-submit.py") parser.add_argument( "--admin-username", default="admin", help="Name of an user with administrative access (defaults to %(default)s)" ) parser.add_argument( "--admin-password", default="admin", help="Password for the administrative user (defaults to %(default)s)" ) parser.add_argument( "--server", default="http://localhost:7189", help="Cloudera Director server URL (defaults to %(default)s)" ) parser.add_argument("--cm", help="The name of the Cloudera Manager server to use in Director") parser.add_argument("--environment", help="The name of the Environment to use in Director") parser.add_argument("--jar", help="JAR for Spark job you want to run on ephemeral cluster") parser.add_argument("--jarclass", help="The --class flag for spark-submit") parser.add_argument("--args", help="The arguments for the jar") parser.add_argument("--script", help="Script that runs before spark job") parser.add_argument("config_file", help="Cluster configuration file (.ini)") args = parser.parse_args() if not isfile(args.config_file): print 'Error: "%s" not found or not a file' % args.config_file return -1 config = ConfigParser.SafeConfigParser() config.read(args.config_file) # Create authenticated client client = cluster.get_authenticated_client(args) # Execute cluster creation cluster_name = cluster.create_cluster(client, args.environment, args.cm, config) print "Waiting for the cluster to be ready. Check the web interface for details." cluster.wait_for_cluster(client, args.environment, args.cm, cluster_name) client = ApiClient(args.server) AuthenticationApi(client).login(Login(username=args.admin_username, password=args.admin_password)) clusters = ClustersApi(client) eph_cluster = clusters.get(args.environment, args.cm, cluster_name) instances = eph_cluster.instances # Find which is a gateway node for instance in instances: if str(instance.virtualInstance.template.name) == "gateway": gateway = instance gateway = gateway.properties["publicDnsName"] print ("The Gateway url is: " + gateway) # Copy the JAR and postscript to the GW copy_jar(args.jar, gateway, config) # Copy script to the GW copy_script(args.script, gateway, config) # Create directory in HDFS with correct permissions configure_hdfs(gateway, config) # Execute the job execute_spark(args.jar, args.jarclass, args.args, gateway, config) # Run some post script execute_script(args.script, gateway, config) # Destroy the cluster print "Job complete, terminating the instance" clusters.delete(args.environment, args.cm, cluster_name) return 0
def runQuery(client, environmentName, deploymentName, clusterName, fromTime, toTime): cluster = ClustersApi(client).get(environmentName, deploymentName, clusterName) if not cluster: return #print("Cloudera Manager URL [%s]" % cluster.url) cluster_health = cluster.health.status cmUrl = urlparse(cluster.url) cm_host = cmUrl.hostname api = ApiResource(cm_host, username="******", password="******") if (cluster_health == 'NOT_AVAILABLE'): return conn = psycopg2.connect( "host=techops-meta-enc.c8ibwewzhjlc.us-east-1.rds.amazonaws.com dbname=spotfire user=spotfirerpt password=spotfire123" ) cur = conn.cursor() ################################Run Impala query##################################################### impalaQuery = "SELECT total_num_queries_rate_across_impalads WHERE entityName RLIKE '.*CD-IMPALA.*' AND category = SERVICE" result = api.query_timeseries(impalaQuery, fromTime, toTime) ts_list = result[0] # Insert every points into database for ts in ts_list.timeSeries: for point in ts.data: cur.execute( "INSERT INTO impala_usage_history (cluster_name, timestamp, average_queries) VALUES (%s, %s, %s)", (clusterName, point.timestamp, point.value)) ################################ Run YARN query ##################################################### yarnQuery = "SELECT apps_running_cumulative WHERE entityName RLIKE '.*root*' AND category = YARN_POOL" result = api.query_timeseries(yarnQuery, fromTime, toTime) ts_list = result[0] # Insert every points into database for ts in ts_list.timeSeries: for point in ts.data: cur.execute( "INSERT INTO yarn_usage_history (cluster_name, timestamp, average_app) VALUES (%s, %s, %s)", (clusterName, point.timestamp, point.value)) ################################Run HDFS query################################################## dfs_capacity_query = "SELECT dfs_capacity/(1024*1024) WHERE entityName RLIKE '.*HDFS.*' AND category = SERVICE" result = api.query_timeseries(dfs_capacity_query, fromTime, toTime) ts_list = result[0] dfs_capacity = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: dfs_capacity.update({point.timestamp: point.value}) dfs_capacity_used_query = "SELECT dfs_capacity_used/(1024*1024) WHERE entityName RLIKE '.*HDFS.*' AND category = SERVICE" result = api.query_timeseries(dfs_capacity_used_query, fromTime, toTime) ts_list = result[0] dfs_capacity_used = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: dfs_capacity_used.update({point.timestamp: point.value}) dfs_capacity_used_non_hdfs_query = "SELECT dfs_capacity_used_non_hdfs/(1024*1024) WHERE entityName RLIKE '.*HDFS.*' AND category = SERVICE" result = api.query_timeseries(dfs_capacity_used_non_hdfs_query, fromTime, toTime) ts_list = result[0] dfs_capacity_used_non_hdfs = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: dfs_capacity_used_non_hdfs.update({point.timestamp: point.value}) # Insert every points into database for point in dfs_capacity: cur.execute( "INSERT INTO hdfs_usage_history (cluster_name, timestamp, dfs_capacity,dfs_capacity_used,dfs_capacity_used_non_hdfs) VALUES (%s, %s, %s, %s, %s)", (clusterName, point, float( dfs_capacity[point]), float(dfs_capacity_used[point]), float(dfs_capacity_used_non_hdfs[point]))) ################################Run CPU query################################################## cpuquery = "SELECT cpu_percent_across_hosts WHERE entityName = '1' AND category = CLUSTER" result = api.query_timeseries(cpuquery, fromTime, toTime) ts_list = result[0] # Insert every points into database for ts in ts_list.timeSeries: for point in ts.data: cur.execute( "INSERT INTO cpu_usage_history (cluster_name, timestamp, cpu_percent_across_hosts) VALUES (%s, %s, %s)", (clusterName, point.timestamp, point.value)) ################################Run Network I/O query########################################## tbreceived_query = "SELECT total_bytes_receive_rate_across_network_interfaces where category = CLUSTER" result = api.query_timeseries(tbreceived_query, fromTime, toTime) ts_list = result[0] tbreceived = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: tbreceived.update({point.timestamp: point.value}) tbtransmit_query = "SELECT total_bytes_transmit_rate_across_network_interfaces where category = CLUSTER" result = api.query_timeseries(tbtransmit_query, fromTime, toTime) ts_list = result[0] tbtransmit = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: tbtransmit.update({point.timestamp: point.value}) # Insert every points into database for point in tbreceived: #print tbreceived[point] #print float(tbreceived[point]) cur.execute( "INSERT INTO network_usage_history (cluster_name, timestamp, total_bytes_receive_rate_across_network_interfaces,total_bytes_transmit_rate_across_network_interfaces) VALUES (%s, %s, %s, %s)", (clusterName, point, tbreceived[point], tbtransmit[point])) ###############################Run HDFS I/O query################################################# tbreadrate_query = "select total_bytes_read_rate_across_datanodes where category = SERVICE and serviceType = HDFS" result = api.query_timeseries(tbreadrate_query, fromTime, toTime) ts_list = result[0] tbreadrate = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: tbreadrate.update({point.timestamp: point.value}) tbwrittenrate_query = "select total_bytes_written_rate_across_datanodes where category = SERVICE and serviceType = HDFS" result = api.query_timeseries(tbwrittenrate_query, fromTime, toTime) ts_list = result[0] tbwrittenrate = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: tbwrittenrate.update({point.timestamp: point.value}) # Insert every points into database for point in tbreadrate: cur.execute( "INSERT INTO hdfsio_usage_history (cluster_name, timestamp, total_bytes_read_rate_across_datanodes,total_bytes_written_rate_across_datanodes) VALUES (%s, %s, %s, %s)", (clusterName, point, tbreadrate[point], tbwrittenrate[point])) ###############################Run Memory query################################################# memoryused_query = "select physical_memory_used WHERE category = HOST" result = api.query_timeseries(memoryused_query, fromTime, toTime) ts_list = result[0] memoryused = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: memoryused.update({point.timestamp: point.value}) memorytotal_query = "select physical_memory_total WHERE category = HOST" result = api.query_timeseries(memorytotal_query, fromTime, toTime) ts_list = result[0] memorytotal = {} # Insert every points into dictionary for ts in ts_list.timeSeries: for point in ts.data: memorytotal.update({point.timestamp: point.value}) # Insert every points into database for point in memoryused: cur.execute( "INSERT INTO memory_usage_history (cluster_name, timestamp, physical_memory_used,physical_memory_total) VALUES (%s, %s, %s, %s)", (clusterName, point, memoryused[point], memorytotal[point])) # Commit and close connections conn.commit() cur.close() conn.close()
def create_cluster(client, environment_name, deployment_name, config): """ Create a new CDH cluster with data from the configuration file @param client: authenticated API client @param environment_name: the name of the parent environment @param deployment_name: the name of the parent deployment @param config: parsed configuration file """ cluster_size = config.getint("cluster", "size") template = ClusterTemplate( name=config.get('cluster', 'name'), productVersions={ 'CDH': config.get('cluster', 'cdh_version') }, services=['HDFS', 'YARN'], virtualInstanceGroups={ 'masters': VirtualInstanceGroup( name='masters', minCount=1, serviceTypeToRoleTypes={ 'HDFS': ['NAMENODE', 'SECONDARYNAMENODE'], 'YARN': ['RESOURCEMANAGER', 'JOBHISTORY'] }, virtualInstances=[create_virtual_instance_with_random_id(config, 'master'), ] ), 'workers': VirtualInstanceGroup( name='workers', minCount=cluster_size, serviceTypeToRoleTypes={ 'HDFS': ['DATANODE', ], 'YARN': ['NODEMANAGER'] }, roleTypesConfigs={ 'HDFS': { 'DATANODE': { 'dfs_datanode_handler_count': '10' }, 'NODEMANAGER': { 'nodemanager_webserver_port': '8047' } } }, virtualInstances=[create_virtual_instance_with_random_id(config, 'worker') for _ in range(0, cluster_size)] ) } ) api = ClustersApi(client) try: api.create(environment_name, deployment_name, template) except HTTPError as e: if e.code == 302: print 'Warning: a cluster with the same name already exists' else: raise e print "Clusters: %s" % api.list(environment_name, deployment_name) return template.name