Example #1
0
def set_up_cluster():
    # get a handle on the instance of CM that we have running
    api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=7)

    # get the CM instance
    cm = ClouderaManager(api)

    # activate the CM trial license
    cm.begin_trial()

    # create the management service
    service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT")
    cm.create_mgmt_service(service_setup)

    # install hosts on this CM instance
    cmd = cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url) 
    print "Installing hosts. This might take a while."
    while cmd.success == None:
	sleep(5)
        cmd = cmd.fetch()

    if cmd.success != True:
        print "cm_host_install failed: " + cmd.resultMessage
        exit(0)

    print "cm_host_install succeeded"

    # first auto-assign roles and auto-configure the CM service
    cm.auto_assign_roles()
    cm.auto_configure()

    # create a cluster on that instance
    cluster = create_cluster(api, cluster_name, cdh_version)

    # add all our hosts to the cluster
    cluster.add_hosts(host_list)

    cluster = api.get_cluster("Cluster 1")

    parcels_list = []
    # get and list all available parcels
    print "Available parcels:"
    for p in cluster.get_all_parcels():
        print '\t' + p.product + ' ' + p.version
        if p.version.startswith(cdh_version_number) and p.product == "CDH":
	    parcels_list.append(p)

    if len(parcels_list) == 0:
        print "No " + cdh_version + " parcel found!"
        exit(0)

    cdh_parcel = parcels_list[0]
    for p in parcels_list:
        if p.version > cdh_parcel.version:
	    cdh_parcel = p

    # download the parcel
    print "Starting parcel download. This might take a while."
    cmd = cdh_parcel.start_download()
    if cmd.success != True:
        print "Parcel download failed!"
        exit(0)

    # make sure the download finishes
    while cdh_parcel.stage != 'DOWNLOADED':
	sleep(5)
        cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name)

    print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded"

    # distribute the parcel
    print "Starting parcel distribution. This might take a while."
    cmd = cdh_parcel.start_distribution()
    if cmd.success != True:
        print "Parcel distribution failed!"
        exit(0)


    # make sure the distribution finishes
    while cdh_parcel.stage != "DISTRIBUTED":
	sleep(5)
	cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name)

    print cdh_parcel.product + ' ' + cdh_parcel.version + " distributed"

    # activate the parcel
    cmd = cdh_parcel.activate()
    if cmd.success != True:
        print "Parcel activation failed!"
        exit(0)

    # make sure the activation finishes
    while cdh_parcel.stage != "ACTIVATED":
	cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name)

    print cdh_parcel.product + ' ' + cdh_parcel.version + " activated"

    # inspect hosts and print the result
    print "Inspecting hosts. This might take a few minutes."

    cmd = cm.inspect_hosts()
    while cmd.success == None:
        cmd = cmd.fetch()

    if cmd.success != True:
        print "Host inpsection failed!"
        exit(0)

    print "Hosts successfully inspected: \n" + cmd.resultMessage

    # create all the services we want to add; we will only create one instance
    # of each
    for s in service_types_and_names.keys():
        service = cluster.create_service(service_types_and_names[s], s)

    # we will auto-assign roles; you can manually assign roles using the
    # /clusters/{clusterName}/services/{serviceName}/role endpoint or by using
    # ApiService.createRole()
    cluster.auto_assign_roles()
    cluster.auto_configure()

    # this will set up the Hive and the reports manager databases because we
    # can't auto-configure those two things
    hive = cluster.get_service(service_types_and_names["HIVE"])
    hive_config = { "hive_metastore_database_host" : hive_metastore_host, \
                    "hive_metastore_database_name" : hive_metastore_name, \
                    "hive_metastore_database_password" : hive_metastore_password, \
	    	    "hive_metastore_database_port" : hive_metastore_database_port, \
		    "hive_metastore_database_type" : hive_metastore_database_type }
    hive.update_config(hive_config)

    # start the management service
    cm_service = cm.get_service()
    cm_service.start().wait()
    
    # this will set the Reports Manager database password
    # first we find the correct role
    rm_role = None
    for r in cm.get_service().get_all_roles():
        if r.type == "REPORTSMANAGER":
            rm_role = r

    if rm_role == None:
	print "No REPORTSMANAGER role found!"
        exit(0)

    # then we get the corresponding role config group -- even though there is
    # only once instance of each CM management service, we do this just in case
    # it is not placed in the base group
    rm_role_group = rm_role.roleConfigGroupRef
    rm_rcg = get_role_config_group(api, rm_role.type, \
                rm_role_group.roleConfigGroupName, None)

    # update the appropriate fields in the config
    rm_rcg_config = { "headlamp_database_host" : reports_manager_host, \
                      "headlamp_database_name" : reports_manager_name, \
                      "headlamp_database_user" : reports_manager_username, \
                      "headlamp_database_password" : reports_manager_password, \
 		      "headlamp_database_type" : reports_manager_database_type }

    rm_rcg.update_config(rm_rcg_config)


    # restart the management service with new configs
    cm_service.restart().wait()

    # execute the first run command
    print "Excuting first run command. This might take a while."
    cmd = cluster.first_run()

    while cmd.success == None:
        cmd = cmd.fetch()

    if cmd.success != True:
        print "The first run command failed: " + cmd.resultMessage()
        exit(0)

    print "First run successfully executed. Your cluster has been set up!"
def set_up_cluster():
    # get a handle on the instance of CM that we have running
    api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=19)

    # get the CM instance
    cm = ClouderaManager(api)

    print "*************************************"
    print " Starting Auto Deployment of Cluster "
    print "*************************************"

    # {'owner': ROAttr(), 'uuid': ROAttr(), 'expiration': ROAttr(),}
    TRIAL = False
    try:

        trial_active = cm.get_license()
        print trial_active

        if trial_active.owner == "Trial License":
            print "Trial License is already set - will NOT continue now."
            print "Assuming Cluster is already setup"
            TRIAL = True
        else:
            print "Setting up `Trial License`."
            cm.begin_trial()
    except:
        cm.begin_trial()

    if TRIAL:
        exit(0)

    # create the management service
    service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT")

    try:
        if not cm.get_service().name:
            cm.create_mgmt_service(service_setup)
        else:
            print "Service already exist."
    except:
        cm.create_mgmt_service(service_setup)

    # install hosts on this CM instance
    cmd = cm.host_install(host_username,
                          host_list,
                          password=host_password,
                          cm_repo_url=cm_repo_url,
                          unlimited_jce=True)
    print "Installing hosts. This might take a while."
    while cmd.success == None:
        sleep(5)
        cmd = cmd.fetch()
        print cmd

    if cmd.success != True:
        print "cm_host_install failed: " + cmd.resultMessage
        exit(0)

    print "cm_host_install succeeded"

    # first auto-assign roles and auto-configure the CM service
    cm.auto_assign_roles()
    cm.auto_configure()

    # create a cluster on that instance
    cluster = create_cluster(api, cluster_name, cdh_version)

    # add all our hosts to the cluster
    cluster.add_hosts(host_list)

    cluster = api.get_cluster(cluster_name)

    parcels_list = []
    # get and list all available parcels
    print "Available parcels:"
    for p in cluster.get_all_parcels():
        print '\t' + p.product + ' ' + p.version
        if p.version.startswith(cdh_version_number) and p.product == "CDH":
            parcels_list.append(p)

    if len(parcels_list) == 0:
        print "No " + cdh_version + " parcel found!"
        exit(0)

    cdh_parcel = parcels_list[0]
    for p in parcels_list:
        if p.version > cdh_parcel.version:
            cdh_parcel = p

    # download the parcel
    print "Starting parcel download. This might take a while."
    cmd = cdh_parcel.start_download()
    if cmd.success != True:
        print "Parcel download failed!"
        exit(0)

    # make sure the download finishes
    while cdh_parcel.stage != 'DOWNLOADED':
        sleep(5)
        cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version,
                                cluster_name)

    print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded"

    # distribute the parcel
    print "Starting parcel distribution. This might take a while."
    cmd = cdh_parcel.start_distribution()
    if cmd.success != True:
        print "Parcel distribution failed!"
        exit(0)

    # make sure the distribution finishes
    while cdh_parcel.stage != "DISTRIBUTED":
        sleep(5)
        cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version,
                                cluster_name)

    print cdh_parcel.product + ' ' + cdh_parcel.version + " distributed"

    # activate the parcel
    cmd = cdh_parcel.activate()
    if cmd.success != True:
        print "Parcel activation failed!"
        exit(0)

    # make sure the activation finishes
    while cdh_parcel.stage != "ACTIVATED":
        cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version,
                                cluster_name)

    print cdh_parcel.product + ' ' + cdh_parcel.version + " activated"

    # inspect hosts and print the result
    print "Inspecting hosts. This might take a few minutes."

    cmd = cm.inspect_hosts()
    while cmd.success == None:
        cmd = cmd.fetch()

    if cmd.success != True:
        print "Host inpsection failed!"
        exit(0)

    print "Hosts successfully inspected: \n" + cmd.resultMessage

    # create all the services we want to add; we will only create one instance
    # of each
    for s in service_types_and_names.keys():
        service = cluster.create_service(service_types_and_names[s], s)

    # we will auto-assign roles; you can manually assign roles using the
    # /clusters/{clusterName}/services/{serviceName}/role endpoint or by using
    # ApiService.createRole()
    cluster.auto_assign_roles()
    cluster.auto_configure()

    # # this will set up the Hive and the reports manager databases because we
    # # can't auto-configure those two things
    # hive = cluster.get_service(service_types_and_names["HIVE"])
    # hive_config = {"hive_metastore_database_host": hive_metastore_host, \
    #                "hive_metastore_database_name": hive_metastore_name, \
    #                "hive_metastore_database_password": hive_metastore_password, \
    #                "hive_metastore_database_port": hive_metastore_database_port, \
    #                "hive_metastore_database_type": hive_metastore_database_type}
    # hive.update_config(hive_config)

    # start the management service
    cm_service = cm.get_service()
    cm_service.start().wait()

    # this will set the Reports Manager database password
    # first we find the correct role
    rm_role = None
    for r in cm.get_service().get_all_roles():
        if r.type == "REPORTSMANAGER":
            rm_role = r

    if rm_role == None:
        print "No REPORTSMANAGER role found!"
        exit(0)

    # then we get the corresponding role config group -- even though there is
    # only once instance of each CM management service, we do this just in case
    # it is not placed in the base group
    rm_role_group = rm_role.roleConfigGroupRef
    rm_rcg = get_role_config_group(api, rm_role.type, \
                                   rm_role_group.roleConfigGroupName, None)

    # update the appropriate fields in the config
    rm_rcg_config = {"headlamp_database_host": reports_manager_host, \
                     "headlamp_database_name": reports_manager_name, \
                     "headlamp_database_user": reports_manager_username, \
                     "headlamp_database_password": reports_manager_password, \
                     "headlamp_database_type": reports_manager_database_type}

    rm_rcg.update_config(rm_rcg_config)

    # restart the management service with new configs
    cm_service.restart().wait()

    # execute the first run command
    print "Excuting first run command. This might take a while."
    cmd = cluster.first_run()

    while cmd.success == None:
        cmd = cmd.fetch()

    if cmd.success != True:
        print "The first run command failed: " + cmd.resultMessage()
        exit(0)

    print "First run successfully executed. Your cluster has been set up!"
Example #3
0
def set_up_cluster(cm_host, host_list):
    print "Setting up CDH cluster..."

    api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=7)
    cm = ClouderaManager(api)

    print "Creating mgmg service."
    try:
        service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT")
        cm.create_mgmt_service(service_setup)
    except ApiException as exc:
        if exc.code != 400:
            print "create MGMT service failed: " + exc
            exit(1)

    print "Installing hosts. This might take a while."
    cmd = cm.host_install(host_username, host_list,
                          password=host_password).wait()
    if cmd.success != True:
        print "cm_host_install failed: " + cmd.resultMessage
        exit(2)

    print "Auto-assign roles and auto-configure the CM service"
    if not is_cluster_installed(api):
        cm.auto_assign_roles()
        cm.auto_configure()

    print "Creating cluster."
    if not is_cluster_installed(api):
        cluster = create_cluster(api, cluster_name, cdh_version)
        cluster.add_hosts(host_list)
    cluster = api.get_cluster(cluster_name)

    cdh_parcel = get_cdh_parcel(cluster)

    print "Downloading CDH parcel. This might take a while."
    if cdh_parcel.stage == "AVAILABLE_REMOTELY":
        cdh_parcel = wait_for_parcel(cdh_parcel.start_download(), api,
                                     cdh_parcel, cluster_name, 'DOWNLOADED')

    print "Distributing CDH parcel. This might take a while."
    if cdh_parcel.stage == "DOWNLOADED":
        cdh_parcel = wait_for_parcel(cdh_parcel.start_distribution(), api,
                                     cdh_parcel, cluster_name, 'DISTRIBUTED')

    print "Activating CDH parcel. This might take a while."
    if cdh_parcel.stage == "DISTRIBUTED":
        cdh_parcel = wait_for_parcel(cdh_parcel.activate(), api, cdh_parcel,
                                     cluster_name, 'ACTIVATED')


#  if cdh_parcel.stage != "ACTIVATED":
#    print "CDH parcel activation failed. Parcel in stage: " + cdh_parcel.stage
#    exit(14)

    print "Inspecting hosts. This might take a few minutes."
    cmd = cm.inspect_hosts()
    while cmd.success == None:
        cmd = cmd.fetch()
    if cmd.success != True:
        print "Host inpsection failed!"
        exit(8)
    print "Hosts successfully inspected: \n" + cmd.resultMessage

    print "Creating specified services."
    for s in service_types_and_names.keys():
        try:
            cluster.get_service(service_types_and_names[s])
        except:
            print "Creating service: " + service_types_and_names[s]
            service = cluster.create_service(service_types_and_names[s], s)

    slaves = [host for host in host_list if 'slave' in host]
    edges = [host for host in host_list if 'edge' in host]

    #assign master roles to master node
    for service in cluster.get_all_services():
        if service.name == 'HDFS-1':
            service.create_role('NAMENODE-1', 'NAMENODE', cm_host)
            service.create_role('SECONDARYNAMENODE', 'SECONDARYNAMENODE',
                                cm_host)
            service.create_role('BALANCER-1', 'BALANCER', cm_host)
            service.create_role('HTTPFS-1', 'HTTPFS', cm_host)
            service.create_role('HDFS-GW_MASTER1', 'GATEWAY', cm_host)
            for (i, edge) in enumerate(edges):
                service.create_role('HDFS-GW_EDGE%s' % i, 'GATEWAY', edge)
            for (i, slave) in enumerate(slaves):
                service.create_role('DATANODE-%s' % i, 'DATANODE', slave)

        if service.name == 'ZOOKEEPER-1':
            service.create_role('ZOOKEEPERSERVER-1', 'SERVER', cm_host)

        if service.name == 'HBASE-1':
            service.create_role('MASTER-1', 'MASTER', cm_host)
            service.create_role('HBASETHRIFTSERVER-1', 'HBASETHRIFTSERVER',
                                cm_host)
            for (i, slave) in enumerate(slaves):
                service.create_role('HBASE-RS-%s' % i, 'REGIONSERVER', slave)

        if service.name == 'HUE-1':
            service.create_role('HUE-MASTER1', 'HUE_SERVER', cm_host)
            service.create_role('HUE-LB_MASTER1', 'HUE_LOAD_BALANCER', cm_host)
            for (i, edge) in enumerate(edges):
                service.create_role('HUE-EDGE%s' % i, 'HUE_SERVER', edge)

        if service.name == 'HIVE-1':
            service.create_role('HIVEMETASTORE-1', 'HIVEMETASTORE', cm_host)
            service.create_role('HIVESERVER-1', 'HIVESERVER2', cm_host)
            service.create_role('HIVE-GW_MASTER1', 'GATEWAY', cm_host)
            for (i, edge) in enumerate(edges):
                service.create_role('HIVE-GW_EDGE%s' % i, 'GATEWAY', edge)
            for (i, slave) in enumerate(slaves):
                service.create_role('HIVE-GW_SLAVE%s' % i, 'GATEWAY', slave)

        if service.name == 'IMPALA-1':
            service.create_role('STATESTORE-1', 'STATESTORE', cm_host)
            service.create_role('CATALOGSERVER-1', 'CATALOGSERVER', cm_host)
            for (i, slave) in enumerate(slaves):
                service.create_role('IMPALAD-%s' % i, 'IMPALAD', slave)

        if service.name == 'OOZIE-1':
            service.create_role('OOZIE_SERVER-1', 'OOZIE_SERVER', cm_host)

        if service.name == 'SPARK_ON_YARN-1':
            service.create_role('SPARK_YARN_HISTORY_SERVER-1',
                                'SPARK_YARN_HISTORY_SERVER', cm_host)
            service.create_role('SPARK_ON_YARN-GW_MASTER%s' % i, 'GATEWAY',
                                cm_host)
            for (i, edge) in enumerate(edges):
                service.create_role('SPARK_ON_YARN-GW_EDGE%s' % i, 'GATEWAY',
                                    edge)
            for (i, slave) in enumerate(slaves):
                service.create_role('SPARK_ON_YARN-GW_SLAVE%s' % i, 'GATEWAY',
                                    slave)

        if service.name == 'SQOOP-1':
            service.create_role('SQOOP_SERVER-1', 'SQOOP_SERVER', cm_host)

        if service.name == 'YARN-1':
            service.create_role('RESOURCEMANAGER-1', 'RESOURCEMANAGER',
                                cm_host)
            service.create_role('JOBHISTORY-1', 'JOBHISTORY', cm_host)
            for (i, slave) in enumerate(slaves):
                service.create_role('NODEMANAGER-%s' % i, 'NODEMANAGER', slave)

    #print "Auto assigning roles."
    #cluster.auto_assign_roles()
    cluster.auto_configure()

    print "Updating Hive config."
    hive_metastore_host = cm_host  # let's assume that
    hive = cluster.get_service(service_types_and_names["HIVE"])
    hive_config = { "hive_metastore_database_host" : hive_metastore_host, \
                    "hive_metastore_database_name" : hive_metastore_name, \
                    "hive_metastore_database_user" : hive_metastore_user, \
                    "hive_metastore_database_password" : hive_metastore_password, \
                    "hive_metastore_database_port" : hive_metastore_database_port, \
                    "hive_metastore_database_type" : hive_metastore_database_type }
    hive.update_config(hive_config)

    print "Updating Hue config."
    hue_db_host = cm_host  # let's assume that
    hue = cluster.get_service(service_types_and_names["HUE"])
    hue_config = {  "database_host" : hue_db_host, \
                    "database_name" : hue_db_name, \
                    "database_user" : hue_db_user, \
                    "database_password" : hue_db_password, \
                    "database_port" : hue_db_port, \
                    "database_type" : hue_db_type }
    hue.update_config(hue_config)

    # Set Java version to OpenJDK
    cm.update_all_hosts_config({'java_home': '/usr/lib/jvm/java-openjdk'})

    print "Starting management service."
    cm_service = cm.get_service()
    cm_service.start().wait()

    print "Excuting first run command. This might take a while."
    cmd = cluster.first_run().wait()
    if cmd.success != True:
        print "The first run command failed: " + cmd.resultMessage
        exit(11)

    print "First run successfully executed. Your cluster has been set up!"

    config = cm.get_config(view='full')
    repolist = config['REMOTE_PARCEL_REPO_URLS']
    value = repolist.value or repolist.default
    value += ',' + anaconda_repo
    cm.update_config({'REMOTE_PARCEL_REPO_URLS': value})
    sleep(10)

    cluster = api.get_cluster(cluster_name)
    parcel = cluster.get_parcel('Anaconda', anaconda_parcel_version)

    print "Downloading Anaconda parcel. This might take a while."
    if parcel.stage == "AVAILABLE_REMOTELY":
        parcel = wait_for_parcel(parcel.start_download(), api, parcel,
                                 cluster_name, 'DOWNLOADED')

    print "Distributing Anaconda parcel. This might take a while."
    if parcel.stage == "DOWNLOADED":
        parcel = wait_for_parcel(parcel.start_distribution(), api, parcel,
                                 cluster_name, 'DISTRIBUTED')

    print "Activating Anaconda parcel. This might take a while."
    if parcel.stage == "DISTRIBUTED":
        parcel = wait_for_parcel(parcel.activate(), api, parcel, cluster_name,
                                 'ACTIVATED')

    print "Anaconda is now installed."