Example #1
0
def create_cluster(client, environment_name, deployment_name, config):
    """
    Create a new CDH cluster with data from the configuration file

    @param client: authenticated API client
    @param environment_name: the name of the parent environment
    @param deployment_name: the name of the parent deployment
    @param config: parsed configuration file
    """
    cluster_size = config.getint("cluster", "size")
    template = ClusterTemplate(
        name=config.get("cluster", "name"),
        productVersions={"CDH": config.get("cluster", "cdh_version")},
        services=["HDFS", "YARN", "SPARK_ON_YARN"],
        virtualInstanceGroups={
            "masters": VirtualInstanceGroup(
                name="masters",
                minCount=1,
                serviceTypeToRoleTypes={
                    "HDFS": ["NAMENODE", "SECONDARYNAMENODE"],
                    "YARN": ["RESOURCEMANAGER", "JOBHISTORY"],
                    "SPARK_ON_YARN": ["SPARK_YARN_HISTORY_SERVER"],
                },
                virtualInstances=[create_virtual_instance_with_random_id(config, "master")],
            ),
            "gateways": VirtualInstanceGroup(
                name="gateways",
                minCount=1,
                serviceTypeToRoleTypes={"SPARK_ON_YARN": ["GATEWAY"], "HDFS": ["GATEWAY"], "YARN": ["GATEWAY"]},
                virtualInstances=[create_virtual_instance_with_random_id(config, "gateway")],
            ),
            "workers": VirtualInstanceGroup(
                name="workers",
                minCount=cluster_size,
                serviceTypeToRoleTypes={"HDFS": ["DATANODE"], "YARN": ["NODEMANAGER"], "SPARK_ON_YARN": ["GATEWAY"]},
                roleTypesConfigs={
                    "HDFS": {
                        "DATANODE": {"dfs_datanode_handler_count": "10"},
                        "NODEMANAGER": {"nodemanager_webserver_port": "8047"},
                    }
                },
                virtualInstances=[
                    create_virtual_instance_with_random_id(config, "worker") for _ in range(0, cluster_size)
                ],
            ),
        },
    )

    api = ClustersApi(client)
    try:
        api.create(environment_name, deployment_name, template)

    except HTTPError as e:
        if e.code == 302:
            print "Warning: a cluster with the same name already exists"
        else:
            raise e

    print "Clusters: %s" % api.list(environment_name, deployment_name)
    return template.name
Example #2
0
def wait_for_cluster(client, environment_name, deployment_name, cluster_name):
    """
    Wait for the cluster bootstrap process to complete

    @param client: authenticated API client
    """
    api = ClustersApi(client)
    stage = None
    while stage not in ["READY", "BOOTSTRAP_FAILED"]:
        sys.stdout.write(".")
        sys.stdout.flush()

        time.sleep(0.5)
        stage = api.getStatus(environment_name, deployment_name, cluster_name).stage

    print "\nCluster '%s' current stage is '%s'" % (cluster_name, stage)
Example #3
0
def wait_for_cluster(client, environment_name, deployment_name, cluster_name):
    """
    Wait for the cluster bootstrap process to complete

    @param client: authenticated API client
    """
    api = ClustersApi(client)
    stage = None
    while stage not in ['READY', 'BOOTSTRAP_FAILED']:
        sys.stdout.write(".")
        sys.stdout.flush()

        time.sleep(0.5)
        stage = api.getStatus(environment_name, deployment_name, cluster_name).stage

    print "\nCluster '%s' current stage is '%s'" % (cluster_name, stage)
Example #4
0
def main(arguments):

    # Get all command line arguments

    cloudera_director_server = arguments[0]
    admin_username = arguments[1]
    credentials_file_path = arguments[2]
    admin_password = open(credentials_file_path, 'r').read()
    num_lookback_dates = arguments[3]

    # Optional arguments for transient clusters
    cluster_name = ''
    if ((len(arguments)) > 4):
        cluster_name = arguments[4]

    # Setup a Cloudera Director Client
    client = ApiClient(cloudera_director_server)
    AuthenticationApi(client).login(
        Login(username=admin_username, password=admin_password))

    # Get all Environments
    environments = EnvironmentsApi(client).list()
    if not environments:
        sys.exit(1)

    # Get start and end time of the query
    local_tz = timezone('US/Eastern')
    from_time = datetime.now() - timedelta(hours=8)
    from_time = from_time.replace(tzinfo=local_tz)
    to_time = datetime.now().replace(tzinfo=local_tz)

    # Iterate through all environments to get all deployments
    for environment in environments:
        deployments = DeploymentsApi(client).list(environment)
        if not deployments:
            continue

        # Iterate through all deployments to get all clusters
        for deployment in deployments:
            clusters = ClustersApi(client).list(environment, deployment)
            if not clusters:
                continue

            # Iterate through all clusters to run queries
            for cluster in clusters:
                #Filter only the cluster if cluster name passed as argument
                if (cluster_name != '' and cluster_name != cluster):
                    continue

                print(
                    "Get the usage of cluster [%s] in deployment [%s] in environment [%s] from [%s] to [%s] "
                    % (cluster, deployment, environment, from_time, to_time))
                runQuery(client, environment, deployment, cluster, from_time,
                         to_time)
Example #5
0
def create_cluster(client, environment_name, deployment_name, config):
    """
    Create a new CDH cluster with data from the configuration file

    @param client: authenticated API client
    @param environment_name: the name of the parent environment
    @param deployment_name: the name of the parent deployment
    @param config: parsed configuration file
    """
    cluster_size = config.getint("cluster", "size")
    template = ClusterTemplate(
        name=config.get('cluster', 'name'),
        productVersions={'CDH': config.get('cluster', 'cdh_version')},
        services=['HDFS', 'YARN'],
        virtualInstanceGroups={
            'masters':
            VirtualInstanceGroup(name='masters',
                                 minCount=1,
                                 serviceTypeToRoleTypes={
                                     'HDFS': ['NAMENODE', 'SECONDARYNAMENODE'],
                                     'YARN': ['RESOURCEMANAGER', 'JOBHISTORY']
                                 },
                                 virtualInstances=[
                                     create_virtual_instance_with_random_id(
                                         config, 'master'),
                                 ]),
            'workers':
            VirtualInstanceGroup(name='workers',
                                 minCount=cluster_size,
                                 serviceTypeToRoleTypes={
                                     'HDFS': [
                                         'DATANODE',
                                     ],
                                     'YARN': ['NODEMANAGER']
                                 },
                                 roleTypesConfigs={
                                     'HDFS': {
                                         'DATANODE': {
                                             'dfs_datanode_handler_count': '10'
                                         },
                                         'NODEMANAGER': {
                                             'nodemanager_webserver_port':
                                             '8047'
                                         }
                                     }
                                 },
                                 virtualInstances=[
                                     create_virtual_instance_with_random_id(
                                         config, 'worker')
                                     for _ in range(0, cluster_size)
                                 ])
        })

    api = ClustersApi(client)
    try:
        api.create(environment_name, deployment_name, template)

    except HTTPError as e:
        if e.code == 302:
            print 'Warning: a cluster with the same name already exists'
        else:
            raise e

    print "Clusters: %s" % api.list(environment_name, deployment_name)
    return template.name
def main():
    parser = argparse.ArgumentParser(prog='ephemeral-spark-submit.py')
    parser.add_argument(
        '--admin-username',
        default="admin",
        help=
        'Name of an user with administrative access (defaults to %(default)s)')
    parser.add_argument(
        '--admin-password',
        default="admin",
        help='Password for the administrative user (defaults to %(default)s)')
    parser.add_argument(
        '--server',
        default="http://localhost:7189",
        help="Cloudera Director server URL (defaults to %(default)s)")
    parser.add_argument(
        '--cm',
        help="The name of the Cloudera Manager server to use in Director")
    parser.add_argument('--environment',
                        help="The name of the Environment to use in Director")
    parser.add_argument(
        '--jar', help="JAR for Spark job you want to run on ephemeral cluster")
    parser.add_argument('--jarclass', help="The --class flag for spark-submit")
    parser.add_argument('--args', help="The arguments for the jar")
    parser.add_argument('--script', help="Script that runs before spark job")
    parser.add_argument('config_file',
                        help="Cluster configuration file (.ini)")
    args = parser.parse_args()

    if not isfile(args.config_file):
        print 'Error: "%s" not found or not a file' % args.config_file
        return -1

    config = ConfigParser.SafeConfigParser()
    config.read(args.config_file)

    #Create authenticated client
    client = cluster.get_authenticated_client(args)

    #Execute cluster creation
    cluster_name = cluster.create_cluster(client, args.environment, args.cm,
                                          config)
    print 'Waiting for the cluster to be ready. Check the web interface for details.'
    cluster.wait_for_cluster(client, args.environment, args.cm, cluster_name)
    client = ApiClient(args.server)
    AuthenticationApi(client).login(
        Login(username=args.admin_username, password=args.admin_password))
    clusters = ClustersApi(client)
    eph_cluster = clusters.get(args.environment, args.cm, cluster_name)
    instances = eph_cluster.instances
    #Find which is a gateway node
    for instance in instances:
        if str(instance.virtualInstance.template.name) == 'gateway':
            gateway = instance
    gateway = gateway.properties['publicDnsName']
    print("The Gateway url is: " + gateway)

    #Copy the JAR and postscript to the GW
    copy_jar(args.jar, gateway, config)
    #Copy script to the GW
    copy_script(args.script, gateway, config)
    #Create directory in HDFS with correct permissions
    configure_hdfs(gateway, config)
    #Execute the job
    execute_spark(args.jar, args.jarclass, args.args, gateway, config)
    #Run some post script
    execute_script(args.script, gateway, config)
    #Destroy the cluster
    print "Job complete, terminating the instance"
    clusters.delete(args.environment, args.cm, cluster_name)

    return 0
Example #7
0
def create_cluster(client, environment_name, deployment_name, config):
    """
    Create a new CDH cluster with data from the configuration file

    @param client: authenticated API client
    @param environment_name: the name of the parent environment
    @param deployment_name: the name of the parent deployment
    @param config: parsed configuration file
    """
    num_workers = config.getint("cluster", "num_workers")
    template = ClusterTemplate(
        name=config.get('cluster', 'name'),
        product_versions={'CDH': config.get('cluster', 'cdh_version')},
        services=['HDFS', 'YARN'],
        services_configs={},
        virtual_instance_groups={
            'masters':
            VirtualInstanceGroup(
                name='masters',
                min_count=1,
                service_type_to_role_types={
                    'HDFS': ['NAMENODE', 'SECONDARYNAMENODE'],
                    'YARN': ['RESOURCEMANAGER', 'JOBHISTORY']
                },
                role_types_configs={},
                virtual_instances=[create_virtual_instance(config, 'master')]),
            'workers':
            VirtualInstanceGroup(
                name='workers',
                min_count=num_workers,
                service_type_to_role_types={
                    'HDFS': ['DATANODE'],
                    'YARN': ['NODEMANAGER']
                },
                # optional role configurations, if desired or needed
                role_types_configs={
                    #'HDFS': {
                    #    'DATANODE': {
                    #        'dfs_datanode_handler_count': '10'
                    #    },
                    #    'NODEMANAGER': {
                    #        'nodemanager_webserver_port': '8047'
                    #    }
                    #}
                },
                virtual_instances=[
                    create_virtual_instance(config, 'worker')
                    for _ in range(0, num_workers)
                ])
        })

    api = ClustersApi(client)
    try:
        api.create(environment_name, deployment_name, template)

    except ApiException as exc:
        if exc.status == 409:
            print 'Warning: a cluster with the same name already exists'
        else:
            raise exc

    print "Clusters: %s" % api.list(environment_name, deployment_name)
    return template.name
def main():
    parser = argparse.ArgumentParser(prog="ephemeral-spark-submit.py")
    parser.add_argument(
        "--admin-username", default="admin", help="Name of an user with administrative access (defaults to %(default)s)"
    )
    parser.add_argument(
        "--admin-password", default="admin", help="Password for the administrative user (defaults to %(default)s)"
    )
    parser.add_argument(
        "--server", default="http://localhost:7189", help="Cloudera Director server URL (defaults to %(default)s)"
    )
    parser.add_argument("--cm", help="The name of the Cloudera Manager server to use in Director")
    parser.add_argument("--environment", help="The name of the Environment to use in Director")
    parser.add_argument("--jar", help="JAR for Spark job you want to run on ephemeral cluster")
    parser.add_argument("--jarclass", help="The --class flag for spark-submit")
    parser.add_argument("--args", help="The arguments for the jar")
    parser.add_argument("--script", help="Script that runs before spark job")
    parser.add_argument("config_file", help="Cluster configuration file (.ini)")
    args = parser.parse_args()

    if not isfile(args.config_file):
        print 'Error: "%s" not found or not a file' % args.config_file
        return -1

    config = ConfigParser.SafeConfigParser()
    config.read(args.config_file)

    # Create authenticated client
    client = cluster.get_authenticated_client(args)

    # Execute cluster creation
    cluster_name = cluster.create_cluster(client, args.environment, args.cm, config)
    print "Waiting for the cluster to be ready. Check the web interface for details."
    cluster.wait_for_cluster(client, args.environment, args.cm, cluster_name)
    client = ApiClient(args.server)
    AuthenticationApi(client).login(Login(username=args.admin_username, password=args.admin_password))
    clusters = ClustersApi(client)
    eph_cluster = clusters.get(args.environment, args.cm, cluster_name)
    instances = eph_cluster.instances
    # Find which is a gateway node
    for instance in instances:
        if str(instance.virtualInstance.template.name) == "gateway":
            gateway = instance
    gateway = gateway.properties["publicDnsName"]
    print ("The Gateway url is: " + gateway)

    # Copy the JAR and postscript to the GW
    copy_jar(args.jar, gateway, config)
    # Copy script to the GW
    copy_script(args.script, gateway, config)
    # Create directory in HDFS with correct permissions
    configure_hdfs(gateway, config)
    # Execute the job
    execute_spark(args.jar, args.jarclass, args.args, gateway, config)
    # Run some post script
    execute_script(args.script, gateway, config)
    # Destroy the cluster
    print "Job complete, terminating the instance"
    clusters.delete(args.environment, args.cm, cluster_name)

    return 0
Example #9
0
def runQuery(client, environmentName, deploymentName, clusterName, fromTime,
             toTime):

    cluster = ClustersApi(client).get(environmentName, deploymentName,
                                      clusterName)
    if not cluster:
        return

    #print("Cloudera Manager URL [%s]" % cluster.url)
    cluster_health = cluster.health.status
    cmUrl = urlparse(cluster.url)
    cm_host = cmUrl.hostname
    api = ApiResource(cm_host, username="******", password="******")

    if (cluster_health == 'NOT_AVAILABLE'):
        return

    conn = psycopg2.connect(
        "host=techops-meta-enc.c8ibwewzhjlc.us-east-1.rds.amazonaws.com dbname=spotfire user=spotfirerpt password=spotfire123"
    )
    cur = conn.cursor()

    ################################Run Impala query#####################################################
    impalaQuery = "SELECT total_num_queries_rate_across_impalads WHERE entityName RLIKE  '.*CD-IMPALA.*' AND category = SERVICE"
    result = api.query_timeseries(impalaQuery, fromTime, toTime)
    ts_list = result[0]

    # Insert every points into database
    for ts in ts_list.timeSeries:
        for point in ts.data:
            cur.execute(
                "INSERT INTO impala_usage_history (cluster_name, timestamp, average_queries) VALUES (%s, %s, %s)",
                (clusterName, point.timestamp, point.value))

    ################################ Run YARN query #####################################################
    yarnQuery = "SELECT apps_running_cumulative WHERE entityName RLIKE '.*root*' AND category = YARN_POOL"
    result = api.query_timeseries(yarnQuery, fromTime, toTime)
    ts_list = result[0]

    # Insert every points into database
    for ts in ts_list.timeSeries:
        for point in ts.data:
            cur.execute(
                "INSERT INTO yarn_usage_history (cluster_name, timestamp, average_app) VALUES (%s, %s, %s)",
                (clusterName, point.timestamp, point.value))

    ################################Run HDFS query##################################################
    dfs_capacity_query = "SELECT dfs_capacity/(1024*1024) WHERE entityName RLIKE  '.*HDFS.*' AND category = SERVICE"
    result = api.query_timeseries(dfs_capacity_query, fromTime, toTime)
    ts_list = result[0]
    dfs_capacity = {}
    # Insert every points into dictionary
    for ts in ts_list.timeSeries:
        for point in ts.data:
            dfs_capacity.update({point.timestamp: point.value})

    dfs_capacity_used_query = "SELECT dfs_capacity_used/(1024*1024) WHERE entityName RLIKE  '.*HDFS.*' AND category = SERVICE"
    result = api.query_timeseries(dfs_capacity_used_query, fromTime, toTime)
    ts_list = result[0]
    dfs_capacity_used = {}
    # Insert every points into dictionary
    for ts in ts_list.timeSeries:
        for point in ts.data:
            dfs_capacity_used.update({point.timestamp: point.value})

    dfs_capacity_used_non_hdfs_query = "SELECT dfs_capacity_used_non_hdfs/(1024*1024) WHERE entityName RLIKE  '.*HDFS.*' AND category = SERVICE"
    result = api.query_timeseries(dfs_capacity_used_non_hdfs_query, fromTime,
                                  toTime)
    ts_list = result[0]
    dfs_capacity_used_non_hdfs = {}
    # Insert every points into dictionary
    for ts in ts_list.timeSeries:
        for point in ts.data:
            dfs_capacity_used_non_hdfs.update({point.timestamp: point.value})

    # Insert every points into database
    for point in dfs_capacity:
        cur.execute(
            "INSERT INTO hdfs_usage_history (cluster_name, timestamp, dfs_capacity,dfs_capacity_used,dfs_capacity_used_non_hdfs) VALUES (%s, %s, %s, %s, %s)",
            (clusterName, point, float(
                dfs_capacity[point]), float(dfs_capacity_used[point]),
             float(dfs_capacity_used_non_hdfs[point])))

    ################################Run CPU query##################################################
    cpuquery = "SELECT cpu_percent_across_hosts WHERE entityName = '1' AND category = CLUSTER"
    result = api.query_timeseries(cpuquery, fromTime, toTime)
    ts_list = result[0]
    # Insert every points into database
    for ts in ts_list.timeSeries:
        for point in ts.data:
            cur.execute(
                "INSERT INTO cpu_usage_history (cluster_name, timestamp, cpu_percent_across_hosts) VALUES (%s, %s, %s)",
                (clusterName, point.timestamp, point.value))

    ################################Run Network I/O query##########################################
    tbreceived_query = "SELECT total_bytes_receive_rate_across_network_interfaces where category = CLUSTER"
    result = api.query_timeseries(tbreceived_query, fromTime, toTime)
    ts_list = result[0]
    tbreceived = {}
    # Insert every points into dictionary
    for ts in ts_list.timeSeries:
        for point in ts.data:
            tbreceived.update({point.timestamp: point.value})

    tbtransmit_query = "SELECT total_bytes_transmit_rate_across_network_interfaces where category = CLUSTER"
    result = api.query_timeseries(tbtransmit_query, fromTime, toTime)
    ts_list = result[0]
    tbtransmit = {}
    # Insert every points into dictionary
    for ts in ts_list.timeSeries:
        for point in ts.data:
            tbtransmit.update({point.timestamp: point.value})

    # Insert every points into database
    for point in tbreceived:
        #print 	tbreceived[point]
        #print float(tbreceived[point])
        cur.execute(
            "INSERT INTO network_usage_history (cluster_name, timestamp, total_bytes_receive_rate_across_network_interfaces,total_bytes_transmit_rate_across_network_interfaces) VALUES (%s, %s, %s, %s)",
            (clusterName, point, tbreceived[point], tbtransmit[point]))

    ###############################Run HDFS I/O query#################################################
    tbreadrate_query = "select total_bytes_read_rate_across_datanodes where category = SERVICE and serviceType = HDFS"
    result = api.query_timeseries(tbreadrate_query, fromTime, toTime)
    ts_list = result[0]
    tbreadrate = {}
    # Insert every points into dictionary
    for ts in ts_list.timeSeries:
        for point in ts.data:
            tbreadrate.update({point.timestamp: point.value})

    tbwrittenrate_query = "select total_bytes_written_rate_across_datanodes where category = SERVICE and serviceType = HDFS"
    result = api.query_timeseries(tbwrittenrate_query, fromTime, toTime)
    ts_list = result[0]
    tbwrittenrate = {}
    # Insert every points into dictionary
    for ts in ts_list.timeSeries:
        for point in ts.data:
            tbwrittenrate.update({point.timestamp: point.value})

    # Insert every points into database
    for point in tbreadrate:
        cur.execute(
            "INSERT INTO hdfsio_usage_history (cluster_name, timestamp, total_bytes_read_rate_across_datanodes,total_bytes_written_rate_across_datanodes) VALUES (%s, %s, %s, %s)",
            (clusterName, point, tbreadrate[point], tbwrittenrate[point]))

    ###############################Run Memory query#################################################
    memoryused_query = "select physical_memory_used WHERE category = HOST"
    result = api.query_timeseries(memoryused_query, fromTime, toTime)
    ts_list = result[0]
    memoryused = {}
    # Insert every points into dictionary
    for ts in ts_list.timeSeries:
        for point in ts.data:
            memoryused.update({point.timestamp: point.value})

    memorytotal_query = "select physical_memory_total WHERE category = HOST"
    result = api.query_timeseries(memorytotal_query, fromTime, toTime)
    ts_list = result[0]
    memorytotal = {}
    # Insert every points into dictionary
    for ts in ts_list.timeSeries:
        for point in ts.data:
            memorytotal.update({point.timestamp: point.value})

    # Insert every points into database
    for point in memoryused:
        cur.execute(
            "INSERT INTO memory_usage_history (cluster_name, timestamp, physical_memory_used,physical_memory_total) VALUES (%s, %s, %s, %s)",
            (clusterName, point, memoryused[point], memorytotal[point]))

    # Commit and close connections
    conn.commit()
    cur.close()
    conn.close()
Example #10
0
def create_cluster(client, environment_name, deployment_name, config):
    """
    Create a new CDH cluster with data from the configuration file

    @param client: authenticated API client
    @param environment_name: the name of the parent environment
    @param deployment_name: the name of the parent deployment
    @param config: parsed configuration file
    """
    cluster_size = config.getint("cluster", "size")
    template = ClusterTemplate(
        name=config.get('cluster', 'name'),
        productVersions={
            'CDH': config.get('cluster', 'cdh_version')
        },
        services=['HDFS', 'YARN'],
        virtualInstanceGroups={
            'masters': VirtualInstanceGroup(
                name='masters',
                minCount=1,
                serviceTypeToRoleTypes={
                    'HDFS': ['NAMENODE', 'SECONDARYNAMENODE'],
                    'YARN': ['RESOURCEMANAGER', 'JOBHISTORY']
                },
                virtualInstances=[create_virtual_instance_with_random_id(config, 'master'), ]
            ),
            'workers': VirtualInstanceGroup(
                name='workers',
                minCount=cluster_size,
                serviceTypeToRoleTypes={
                    'HDFS': ['DATANODE', ],
                    'YARN': ['NODEMANAGER']
                },
                roleTypesConfigs={
                    'HDFS': {
                        'DATANODE': {
                            'dfs_datanode_handler_count': '10'
                        },
                        'NODEMANAGER': {
                            'nodemanager_webserver_port': '8047'
                        }
                    }
                },
                virtualInstances=[create_virtual_instance_with_random_id(config, 'worker')
                                  for _ in range(0, cluster_size)]
            )
        }
    )

    api = ClustersApi(client)
    try:
        api.create(environment_name, deployment_name, template)

    except HTTPError as e:
        if e.code == 302:
            print 'Warning: a cluster with the same name already exists'
        else:
            raise e

    print "Clusters: %s" % api.list(environment_name, deployment_name)
    return template.name