def do_call(host, port, version, user, password, cluster_name, parcel_name, parcel_version, parcel_repo, init_pre_dir, init_post_dir):
    api = ApiResource(host, port, user, password, False, version)
    if not parcel_repo.endswith('/'):
        parcel_repo += '/'
    if re.match(REGEX_VERSION, parcel_version) is None or re.match(REGEX_VERSION, parcel_version).group() != parcel_version:
        raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] expected to match regular expression [' + REGEX_VERSION + ']')
    if not parcel_repo.endswith(parcel_version + '/'):
        raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] when compared with repository [' + parcel_repo + ']')    
    cm_config = api.get_cloudera_manager().get_config(view='full')
    repo_config = cm_config['REMOTE_PARCEL_REPO_URLS']
    repo_list = repo_config.value or repo_config.default
    if parcel_repo not in repo_list:     
        repo_list += ',' + parcel_repo
        api.get_cloudera_manager().update_config({'REMOTE_PARCEL_REPO_URLS': repo_list})
        time.sleep(POLL_SEC)  # The parcel synchronize end-point is not exposed via the API, so sleep instead
    cluster_names = []
    if cluster_name is None:
        for cluster in api.get_all_clusters():
            cluster_names.append(cluster.name)
    else:
        cluster_names.append(cluster_name)
    for cluster_name_itr in cluster_names:
        print 'Cluster [DEPLOYMENT] starting ... '
        cluster = api.get_cluster(cluster_name_itr)
        parcel = cluster.get_parcel(parcel_name, parcel_version)
        print 'Parcel [DEPLOYMENT] starting ... '
        do_parcel_op(cluster, parcel_name, parcel_version, 'DOWNLOAD', 'AVAILABLE_REMOTELY', 'DOWNLOADED', 'start_download')
        do_parcel_op(cluster, parcel_name, parcel_version, 'DISTRIBUTE', 'DOWNLOADED', 'DISTRIBUTED', 'start_distribution')
        do_parcel_op(cluster, parcel_name, parcel_version, 'ACTIVATE', 'DISTRIBUTED', 'ACTIVATED', 'activate')
        parcel = cluster.get_parcel(parcel_name, parcel_version)
        if parcel.stage != 'ACTIVATED':
            raise Exception('Parcel is currently mid-stage [' + parcel.stage + '], please wait for this to complete')
        print 'Parcel [DEPLOYMENT] finished'
        if init_pre_dir is not None and os.path.isdir(init_pre_dir):
            print 'Cluster [PRE_INIT] starting ... '
            for script in glob.glob(init_pre_dir + '/*.sh'):
                subprocess.call([script])
            print 'Cluster [PRE_INIT] finihsed'            
        print 'Cluster [CONFIG_DEPLOYMENT] starting ... '
        cluster.deploy_client_config()
        cmd = cluster.deploy_client_config()
        if not cmd.wait(TIMEOUT_SEC).success:
            raise Exception('Failed to deploy client configs')
        print 'Cluster [CONFIG_DEPLOYMENT] finihsed'
        print 'Cluster [STOP] starting ... '
        cluster.stop().wait()
        print 'Cluster [STOP] finihsed'
        print 'Cluster [START] starting ... '
        cluster.start().wait()
        print 'Cluster [START] finihsed'
        if init_post_dir is not None and os.path.isdir(init_post_dir):
            print 'Cluster [POST_INIT] starting ... '
            for script in glob.glob(init_post_dir + '/*.sh'):
                subprocess.call([script])
            print 'Cluster [POST_INIT] finihsed'            
        print 'Cluster [DEPLOYMENT] finished'
Ejemplo n.º 2
0
def main():
    """
    Kerberizes a cluster.

    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host, settings.port, settings.username,
                      settings.password, settings.use_tls, 8)

    cloudera_manager = api.get_cloudera_manager()
    cluster = api.get_cluster(settings.cluster)
    mgmt_service = cloudera_manager.get_service()

    if verify_cloudera_manager_has_kerberos_principal(cloudera_manager):
        wait_for_command('Stopping the cluster', cluster.stop())
        wait_for_command('Stopping MGMT services', mgmt_service.stop())
        configure_services(cluster)
        wait_for_generate_credentials(cloudera_manager)
        wait_for_command('Deploying client configs.',
                         cluster.deploy_client_config())
        wait_for_command('Deploying cluster client configs',
                         cluster.deploy_cluster_client_config())
        wait_for_command('Starting MGMT services', mgmt_service.start())
        wait_for_command('Starting the cluster', cluster.start())
    else:
        print "Cluster does not have Kerberos admin credentials.  Exiting!"

    return 0
Ejemplo n.º 3
0
def main():
    """
    Add peer to the cluster.
    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = parse_args()
    if len(sys.argv) == 1 or len(sys.argv) > 17:
        print_usage_message()
        quit(1)

    api_target = ApiResource(settings.server, settings.port, settings.username,
                             settings.password, settings.use_tls, 14)
    cloudera_manager = api_target.get_cloudera_manager()
    try:
        cloudera_manager.create_peer(settings.peer_name,
                                     settings.source_cm_url,
                                     settings.source_user,
                                     settings.source_password)
        print "Peer Successfully Added"
    except ApiException as error:
        if 'already exists' in str(error):
            print 'Peer Already exists'
        else:
            raise error

    return 0
Ejemplo n.º 4
0
def connect(cm_api, cm_username, cm_password, use_proxy=False):
    '''
    Wait for ten minutes for CM to come up
    '''

    for _ in xrange(120):
        try:
            logging.info("Checking CM availability....")
            # change name of proxy if necessary
            proxy = urllib2.ProxyHandler({'http': 'proxy'})

            api = ApiResource(cm_api, username=cm_username, password=cm_password, version=14)

            if use_proxy:
            # pylint: disable=W0212
                api._client._opener.add_handler(proxy)

            cloudera_manager = api.get_cloudera_manager()
            api.get_user(cm_username)

            return api, cloudera_manager
        except Exception:
            logging.warning("CM is not up")
            time.sleep(5)
    logging.error("CM did not come UP")
    sys.exit(-1)
Ejemplo n.º 5
0
def connect(cm_api, cm_username, cm_password, use_proxy=False):
    '''
    Wait for ten minutes for CM to come up
    '''

    for _ in xrange(120):
        try:
            logging.info("Checking CM availability....")
            # change name of proxy if necessary
            proxy = urllib2.ProxyHandler({'http': 'proxy'})

            api = ApiResource(cm_api,
                              username=cm_username,
                              password=cm_password,
                              version=14)

            if use_proxy:
                # pylint: disable=W0212
                api._client._opener.add_handler(proxy)

            cloudera_manager = api.get_cloudera_manager()
            api.get_user(cm_username)

            return api, cloudera_manager
        except Exception:
            logging.warning("CM is not up")
            time.sleep(5)
    logging.error("CM did not come UP")
    sys.exit(-1)
Ejemplo n.º 6
0
def init_cluster():
    # wait for all cloudera agent processes to come up
    setup_logger.info("Creating Clutser.")
    BDVLIB_ServiceWait([["services", "cloudera_scm_agent", NODE_GROUP_ID]])
    # make sure cloudera manager has received registration
    # for all new agents
    all_cloudera_hosts = get_hosts_for_service(
        ["services", "cloudera_scm_agent"])
    api = ApiResource(CM_HOST, username=ADMIN_USER, password=ADMIN_PASS)
    while True:
        current_all_hosts = map(lambda x: x.hostname, api.get_all_hosts())
        setup_logger.info("Currently registered hosts with CM " +
                          str(current_all_hosts))
        if all(x in current_all_hosts for x in all_cloudera_hosts):
            break
        setup_logger.info(
            "waiting for new nodes to register with cloudera manager")
        time.sleep(10)
    manager = api.get_cloudera_manager()
    manager.update_config(CM_CONFIG)
    cluster = api.create_cluster(CLUSTER_NAME, CDH_MAJOR_VERSION,
                                 CDH_FULL_VERSION)
    cluster.add_hosts(ALL_HOSTS)

    # turn off host swap alerting
    hosts_swap_alert_off(api)

    setup_logger.info("Setting Up SPARK2 Repo....")
    add_spark2_repo(api)
    ##Set java home
    setup_logger.info("Setting Up Java Path....")
    hosts_set_javahome(api)

    return (cluster, manager)
Ejemplo n.º 7
0
def init_cluster():
    # wait for all cloudera agent processes to come up
    BDVLIB_ServiceWait(
        [["services", "cloudera_scm_agent", NODE_GROUP_ID, "kts"]])
    # make sure cloudera manager has received registration
    # for all new agents
    all_cloudera_hosts = get_hosts_for_service(
        ["services", "cloudera_scm_agent"])
    api = ApiResource(CM_HOST, username="******", password="******")
    while True:
        current_all_hosts = map(lambda x: x.hostname, api.get_all_hosts())
        setup_logger.info("Currently registered hosts with CM " +
                          str(current_all_hosts))
        if all(x in current_all_hosts for x in all_cloudera_hosts):
            break
        setup_logger.info(
            "waiting for new nodes to register with cloudera manager")
        time.sleep(10)
    manager = api.get_cloudera_manager()
    manager.update_config(CM_CONFIG)
    cluster = api.create_cluster(CLUSTER_NAME, CDH_MAJOR_VERSION,
                                 CDH_FULL_VERSION)
    KTS_HOSTS = ConfigMeta.getWithTokens(
        ['nodegroups', NODE_GROUP_ID, 'roles', 'kts', 'fqdns'])
    cluster.add_hosts(KTS_HOSTS)

    return (cluster, manager)
Ejemplo n.º 8
0
def main():
    """
    Kerberizes a cluster.

    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host, settings.port, settings.username,
                      settings.password, settings.use_tls, 8)

    cloudera_manager = api.get_cloudera_manager()
    cluster = api.get_cluster(settings.cluster)
    mgmt_service = cloudera_manager.get_service()

    if verify_cloudera_manager_has_kerberos_principal(cloudera_manager):
        wait_for_command('Stopping the cluster', cluster.stop())
        wait_for_command('Stopping MGMT services', mgmt_service.stop())
        configure_services(cluster)
        wait_for_generate_credentials(cloudera_manager)
        wait_for_command('Deploying client configs.', cluster.deploy_client_config())
        wait_for_command('Deploying cluster client configs', cluster.deploy_cluster_client_config())
        wait_for_command('Starting MGMT services', mgmt_service.start())
        wait_for_command('Starting the cluster', cluster.start())
    else:
        print "Cluster does not have Kerberos admin credentials.  Exiting!"

    return 0
Ejemplo n.º 9
0
def main():
    # connect cm api
    api = ApiResource(CM_HOST,
                      7180,
                      username=CM_USERNAME,
                      password=CM_PASSWORD)
    manager = api.get_cloudera_manager()
    # no need to update cm config
    #manager.update_config(cm_host)
    print("[INFO] Connected to CM host on " + CM_HOST)

    # create cluster object
    try:
        cluster = api.get_cluster(name=CLUSTER_NAME)
    except:
        cluster = init_cluster(api, CLUSTER_NAME, CLUSTER_VERSION,
                               CLUSTER_NODE_COUNT)
    print("[INFO] Initialized cluster " + CLUSTER_NAME +
          " which uses CDH version " + CLUSTER_VERSION)

    #
    mgmt_servicename = "MGMT"
    amon_role_name = "ACTIVITYMONITOR"
    apub_role_name = "ALERTPUBLISHER"
    eserv_role_name = "EVENTSERVER"
    hmon_role_name = "HOSTMONITOR"
    smon_role_name = "SERVICEMONITOR"
    nav_role_name = "NAVIGATOR"
    navms_role_name = "NAVIGATORMETADATASERVER"
    rman_role_name = "REPORTMANAGER"
    deploy_management(manager, mgmt_servicename, amon_role_name,
                      apub_role_name, eserv_role_name, hmon_role_name,
                      smon_role_name, nav_role_name, navms_role_name,
                      rman_role_name)
    print("[INFO] Deployed CM management service " + mgmt_servicename +
          " to run on " + CM_HOST)

    #
    assign_roles(api, cluster)
    print("[INFO] all roles have assigned.")

    #
    # Custom role config groups cannot be automatically configured: Gateway Group 1 (error 400)
    try:
        cluster.auto_configure()
    except:
        pass
    update_custom_config(api, cluster)
    print("[INFO] all servies and roles have configured.")
    #
    cmd = cluster.first_run()
    while cmd.success == None:
        cmd = cmd.fetch()
    if not cmd.success:
        print("[ERROR] The first run command failed: " + cmd.resultMessage())
    else:
        print(
            "[INFO] First run successfully executed. Your cluster has been set up!"
        )
Ejemplo n.º 10
0
def main(cm_host, user, password):
    api = ApiResource(cm_host, username=user, password=password)
    cm = api.get_cloudera_manager()
    cm.update_all_hosts_config(
        {"java_home": "/usr/java/jdk1.8.0_121-cloudera"})
    print("restarting CM service - this will take a minute or so")
    cm.get_service().restart().wait()
    print("restarting cluster - this will take 2-5 minutes")
    api.get_all_clusters()[0].restart(restart_only_stale_services=True,
                                      redeploy_client_configuration=True).wait()
Ejemplo n.º 11
0
def start(host, user, passw):
    cm_host = str(host)
    api = ApiResource(cm_host,
                      7180,
                      username=str(username),
                      password=str(password),
                      version=9)
    mgmt = api.get_cloudera_manager().get_service()
    mgmt.restart()
    print("Services successfully started")
Ejemplo n.º 12
0
def main():
  parser = cm_args_parser()
  args = parser.parse_args()
  print "connecting to host:" + args.cm_host + "..."
  api = ApiResource(args.cm_host, username=args.cm_user, password=args.cm_password)
  print "host connected, getting cloudera manager "
  MANAGER = api.get_cloudera_manager()
  print "have cloudera manager object" 
  deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG)
  print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + CM_HOST
Ejemplo n.º 13
0
def connect_cm(cm_host, cm_username, cm_password):
    """
    Connects to Cloudera Manager API Resource instance to retrieve Endpoint details
    :param cm_host: Cloudera Manager host
    :param cm_username: Username for authentication
    :param cm_password: Password for authentication
    :return:
    """
    api = ApiResource(cm_host, version=6, username=cm_username, password=cm_password)
    cm_manager = api.get_cloudera_manager()
    return api, cm_manager
Ejemplo n.º 14
0
def connect_cm(cm_host, cm_username, cm_password):
    """
    Connects to Cloudera Manager API Resource instance to retrieve Endpoint details
    :param cm_host: Cloudera Manager host
    :param cm_username: Username for authentication
    :param cm_password: Password for authentication
    :return:
    """
    api = ApiResource(cm_host, version=6, username=cm_username, password=cm_password)
    cm_manager = api.get_cloudera_manager()
    return api, cm_manager
def main():
    api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=api_num)
    cm = ClouderaManager(api)
    #cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url)
    MANAGER = api.get_cloudera_manager()
    #MANAGER.update_config)
    print "Connected to CM host on " + cm_host + " and updated CM configuration"

    #CLUSTER = init_cluster(api, cluster_name , cdh_version, host_list ,host_list)

    deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, NAV_ROLENAME, NAV_ROLE_CONFIG, NAVMS_ROLENAME, NAVMS_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG)

    print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + cm_host + "now service is stop!"
Ejemplo n.º 16
0
def connect(cm_api, cm_username, cm_password, use_proxy=False):

    # change name of proxy if necessary
    proxy = urllib2.ProxyHandler({'http': 'proxy'})

    api = ApiResource(cm_api, username=cm_username, password=cm_password)

    if use_proxy:
        # pylint: disable=W0212
        api._client._opener.add_handler(proxy)

    cloudera_manager = api.get_cloudera_manager()
    api.get_user(cm_username)

    return api, cloudera_manager
Ejemplo n.º 17
0
def main(cm_host, user, password):
    api = ApiResource(cm_host, username=user, password=password)
    cm = api.get_cloudera_manager()
    config = cm.get_all_hosts_config(view='full')
    if config['java_home'].value == "/usr/java/jdk1.8.0_121-cloudera":
        print "Java home already set - skipping"
    else:
        print "Updating jdk location"
        cm.update_all_hosts_config(
            {"java_home": "/usr/java/jdk1.8.0_121-cloudera"})
        print("restarting CM service - this will take a minute or so")
        cm.get_service().restart().wait()
        print("restarting cluster - this will take 2-5 minutes")
        api.get_all_clusters()[0].restart(restart_only_stale_services=True,
                                          redeploy_client_configuration=True).wait()
Ejemplo n.º 18
0
def main():
    # connect cm api
    api = ApiResource(CM_HOST, 7180, username=CM_USERNAME, password=CM_PASSWORD)
    manager = api.get_cloudera_manager()
    # no need to update cm config
    #manager.update_config(cm_host)
    print("[INFO] Connected to CM host on " + CM_HOST)

    # create cluster object
    try:
        cluster = api.get_cluster(name=CLUSTER_NAME)
    except:
        cluster = init_cluster(api, CLUSTER_NAME, CLUSTER_VERSION, CLUSTER_NODE_COUNT)
    print("[INFO] Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CLUSTER_VERSION)

    #
    mgmt_servicename = "MGMT"
    amon_role_name = "ACTIVITYMONITOR"
    apub_role_name = "ALERTPUBLISHER"
    eserv_role_name = "EVENTSERVER"
    hmon_role_name = "HOSTMONITOR"
    smon_role_name = "SERVICEMONITOR"
    nav_role_name = "NAVIGATOR"
    navms_role_name = "NAVIGATORMETADATASERVER"
    rman_role_name = "REPORTMANAGER"
    deploy_management(manager, mgmt_servicename, amon_role_name, apub_role_name, eserv_role_name, hmon_role_name, smon_role_name, nav_role_name, navms_role_name, rman_role_name)
    print("[INFO] Deployed CM management service " + mgmt_servicename + " to run on " + CM_HOST)

    #
    assign_roles(api, cluster)
    print("[INFO] all roles have assigned.")

    #
    # Custom role config groups cannot be automatically configured: Gateway Group 1 (error 400)
    try:
        cluster.auto_configure()
    except:
        pass
    update_custom_config(api, cluster)
    print("[INFO] all servies and roles have configured.")
    #
    cmd = cluster.first_run()
    while cmd.success == None:
        cmd = cmd.fetch()
    if not cmd.success:
        print("[ERROR] The first run command failed: " + cmd.resultMessage())
    else:
        print("[INFO] First run successfully executed. Your cluster has been set up!")
Ejemplo n.º 19
0
def main():
    resource = ApiResource("localhost", 7180, "cloudera", "cloudera", version=19)
    cluster = resource.get_cluster("Cloudera Quickstart")

    cm_manager = resource.get_cloudera_manager()
    cm_manager.update_config({'REMOTE_PARCEL_REPO_URLS': PARCEL_REPO})
    cm_manager.update_all_hosts_config(JDK_CONFIG)
    time.sleep(5)

    for parcel in PARCELS:
        ParcelInstaller(parcel['name'], parcel['version']).install(cluster)

    print "Restarting cluster"
    cluster.stop().wait()
    cluster.start().wait()
    print "Done restarting cluster"
Ejemplo n.º 20
0
def main():

    AMON_ROLE_CONFIG = {
   'firehose_heapsize': '1173741824',
}

    API = ApiResource("ec2-52-24-151-222.us-west-2.compute.amazonaws.com", version=5, username="******", password="******")
    MANAGER = API.get_cloudera_manager()
    mgmt = MANAGER.get_service()
    #cf = mgmt.get_config()

    for group in mgmt.get_all_role_config_groups():
       if group.roleType == "ACTIVITYMONITOR":
           group.update_config(AMON_ROLE_CONFIG)

    pass
Ejemplo n.º 21
0
def main():
    parser = cm_args_parser()
    args = parser.parse_args()
    print "connecting to host:" + args.cm_host + "..."
    api = ApiResource(args.cm_host,
                      username=args.cm_user,
                      password=args.cm_password)
    print "host connected, getting cloudera manager "
    MANAGER = api.get_cloudera_manager()
    print "have cloudera manager object"
    deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG,
                      MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG,
                      APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME,
                      ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG,
                      SMON_ROLENAME, SMON_ROLE_CONFIG, RMAN_ROLENAME,
                      RMAN_ROLE_CONFIG)
    print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + CM_HOST
Ejemplo n.º 22
0
def main():
    """
    Enables HDFS HA on a cluster.

    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host,
                      settings.port,
                      settings.username,
                      settings.password,
                      version=6)

    if not validate_cluster(api, settings.cluster):
        write_to_stdout(
            "Cluster does not satisfy preconditions for enabling HDFS HA. Exiting!"
        )
        return 1

    if settings.wait_for_good_health:
        write_to_stdout("Waiting for GOOD health... ")
        if not wait_for_good_health(api, settings.cluster):
            write_to_stdout("Cluster health is not GOOD.  Exiting!\n")
            return 1
    else:
        write_to_stdout("Checking cluster health... ")
        if not check_health(api, settings.cluster):
            write_to_stdout("Cluster health is not GOOD.  Exiting!\n")

    write_to_stdout("Cluster health is GOOD!\n")

    cluster = api.get_cluster(settings.cluster)

    invoke_hdfs_enable_nn_ha(cluster, settings.nameservice)
    update_hive_for_ha_hdfs(cluster)

    # Restarting the MGMT services to make sure the HDFS file browser functions
    # as expected.
    cloudera_manager = api.get_cloudera_manager()
    mgmt_service = cloudera_manager.get_service()
    wait_for_command('Restarting MGMT services', mgmt_service.restart())

    return 0
Ejemplo n.º 23
0
def main():
    """
    Enables HDFS HA on a cluster.

    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host, settings.port, settings.username, settings.password,
                      version=6)

    if not validate_cluster(api, settings.cluster):
        write_to_stdout("Cluster does not satisfy preconditions for enabling HDFS HA. Exiting!")
        return 1

    if settings.wait_for_good_health:
        write_to_stdout("Waiting for GOOD health... ")
        if not wait_for_good_health(api, settings.cluster):
            write_to_stdout("Cluster health is not GOOD.  Exiting!\n")
            return 1
    else:
        write_to_stdout("Checking cluster health... ")
        if not check_health(api, settings.cluster):
            write_to_stdout("Cluster health is not GOOD.  Exiting!\n")

    write_to_stdout("Cluster health is GOOD!\n")

    cluster = api.get_cluster(settings.cluster)

    invoke_hdfs_enable_nn_ha(cluster, settings.nameservice)
    update_hive_for_ha_hdfs(cluster)

    # Restarting the MGMT services to make sure the HDFS file browser functions
    # as expected.
    cloudera_manager = api.get_cloudera_manager()
    mgmt_service = cloudera_manager.get_service()
    wait_for_command('Restarting MGMT services', mgmt_service.restart())

    return 0
Ejemplo n.º 24
0
# FIXME: could be removed in future version?
f = file('/etc/cloudera-scm-server/db.mgmt.properties')
for line in f:
  if not line.startswith("#"):
    (key, value) = line.split("=")
    s = key.split('.')
    service = s[3].strip()
    setting = s[5].strip()
    value = value.strip()
    if service not in creds:
      creds[service] = {}
    creds[service][setting] = value


api = ApiResource(sys.argv[1], username="******", password="******", use_tls=False, version=4)
cm = api.get_cloudera_manager()

roles = [ApiRole(api, t.lower(), t, ApiHostRef(api, sys.argv[1])) for t in ROLE_TYPES]
try:
   service = cm.get_service()
except ApiException:
   mgmt = ApiServiceSetupInfo("management", "MGMT", roles=roles)
   service = cm.create_mgmt_service(mgmt)

rcg = service.get_all_role_config_groups()
for rc in rcg:
  if rc.roleType in ROLE_TYPES:
    config = {}
    # Reduce amount of some logs to 1 day
    if rc.roleType == "ACTIVITYMONITOR":
        config["firehose_activity_purge_duration_hours"] = "24"
Ejemplo n.º 25
0
# Loading Cluster config
CLUSTER_NM=str(options.cluster_nm )
CDH_VERSION=str(options.cdh_version)
CDH_PARCEL_VERSION=CDH_VERSION.split('-')[0]
HOST_FILE = str(options.host_file )
HOST_USER = str(options.host_user)
HOST_PASS = str(options.host_pass )
LICENSE_TYPE = int(options.license_type or  "1")
LOG_FILE = "/".join(os.path.realpath( __file__ ).split("/")[:-2])+'/log/scale_cluster_exechistory_'+str(os.getpid())+'.log';
OUTPUT_TEMPLATE = "/".join(os.path.realpath( __file__ ).split("/")[:-2])+"/output/final_template_1.json";
CLUSTER_HOSTS = []


# Adding cloudera parcel URL
api = ApiResource(CM_HOST, CM_PORT , ADMIN_USER, ADMIN_PASS, version=CM_API_VERSION )
api.get_cloudera_manager().update_config({"REMOTE_PARCEL_REPO_URLS": BASE_URL + "/cdh5/parcels/" + CDH_PARCEL_VERSION + "/"})


def logger(logType,msg):
        '''
Logger v1
logType : [debug|info|warn|error|critical]
msg : Log message
:return:
        '''
        logging.basicConfig(filename=LOG_FILE,level=logging.DEBUG,format='%(asctime)s.%(msecs)d %(levelname)s %(module)s - %(funcName)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")

        if logType == 'debug': logging.debug(msg)
        elif logType == 'info': logging.info(msg)
        elif logType == 'warn': logging.warning(msg)
        elif logType == 'error': logging.error(msg)
Ejemplo n.º 26
0
def main():
   API = ApiResource(CM_HOST, version=5, username=ADMIN_USER, password=ADMIN_PASS)
   MANAGER = API.get_cloudera_manager()
   MANAGER.update_config(CM_CONFIG)
   print "Connected to CM host on " + CM_HOST + " and updated CM configuration"

   CLUSTER = init_cluster(API, CLUSTER_NAME, CDH_VERSION, CLUSTER_HOSTS, CM_HOST)
   print "Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CDH_VERSION

   deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, NAV_ROLENAME, NAV_ROLE_CONFIG, NAVMS_ROLENAME, NAVMS_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG)
   print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + CM_HOST
   
   deploy_parcels(CLUSTER, PARCELS)
   print "Downloaded and distributed parcels: "
   PRETTY_PRINT.pprint(PARCELS)

   zookeeper_service = deploy_zookeeper(CLUSTER, ZOOKEEPER_SERVICE_NAME, ZOOKEEPER_HOSTS, ZOOKEEPER_SERVICE_CONFIG, ZOOKEEPER_ROLE_CONFIG)
   print "Deployed ZooKeeper " + ZOOKEEPER_SERVICE_NAME + " to run on: "
   PRETTY_PRINT.pprint(ZOOKEEPER_HOSTS)
   
   hdfs_service = deploy_hdfs(CLUSTER, HDFS_SERVICE_NAME, HDFS_SERVICE_CONFIG, HDFS_NAMENODE_SERVICE_NAME, HDFS_NAMENODE_HOST, HDFS_NAMENODE_CONFIG, HDFS_SECONDARY_NAMENODE_HOST, HDFS_SECONDARY_NAMENODE_CONFIG, HDFS_DATANODE_HOSTS, HDFS_DATANODE_CONFIG, HDFS_GATEWAY_HOSTS, HDFS_GATEWAY_CONFIG)
   print "Deployed HDFS service " + HDFS_SERVICE_NAME + " using NameNode on " + HDFS_NAMENODE_HOST + ", SecondaryNameNode on " + HDFS_SECONDARY_NAMENODE_HOST + ", and DataNodes running on: "
   PRETTY_PRINT.pprint(HDFS_DATANODE_HOSTS)
   init_hdfs(hdfs_service, HDFS_SERVICE_NAME, CMD_TIMEOUT)
   print "Initialized HDFS service"

   # mapred and yarn are mutually exclusive; only deploy one of them
   #mapred_service = deploy_mapreduce(CLUSTER, MAPRED_SERVICE_NAME, MAPRED_SERVICE_CONFIG, MAPRED_JT_HOST, MAPRED_JT_CONFIG, MAPRED_TT_HOSTS, MAPRED_TT_CONFIG, MAPRED_GW_HOSTS, MAPRED_GW_CONFIG)
   print "Deployed MapReduce service " + MAPRED_SERVICE_NAME + " using JobTracker on " + MAPRED_JT_HOST + " and TaskTrackers running on "
   PRETTY_PRINT.pprint(MAPRED_TT_HOSTS)
   
   yarn_service = deploy_yarn(CLUSTER, YARN_SERVICE_NAME, YARN_SERVICE_CONFIG, YARN_RM_HOST, YARN_RM_CONFIG, YARN_JHS_HOST, YARN_JHS_CONFIG, YARN_NM_HOSTS, YARN_NM_CONFIG, YARN_GW_HOSTS, YARN_GW_CONFIG)
   print "Deployed YARN service " + YARN_SERVICE_NAME + " using ResourceManager on " + YARN_RM_HOST + ", JobHistoryServer on " + YARN_JHS_HOST + ", and NodeManagers on "
   PRETTY_PRINT.pprint(YARN_NM_HOSTS)
   
   spark_service = deploy_spark(CLUSTER, SPARK_SERVICE_NAME, SPARK_SERVICE_CONFIG, SPARK_MASTER_HOST, SPARK_MASTER_CONFIG, SPARK_WORKER_HOSTS, SPARK_WORKER_CONFIG, SPARK_GW_HOSTS, SPARK_GW_CONFIG)
   print "Deployed SPARK service " + SPARK_SERVICE_NAME + " using SparkMaster on " + SPARK_MASTER_HOST + " and SparkWorkers on "
   PRETTY_PRINT.pprint(SPARK_WORKER_HOSTS)
   
   deploy_hbase(CLUSTER, HBASE_SERVICE_NAME, HBASE_SERVICE_CONFIG, HBASE_HM_HOST, HBASE_HM_CONFIG, HBASE_RS_HOSTS, HBASE_RS_CONFIG, HBASE_THRIFTSERVER_SERVICE_NAME, HBASE_THRIFTSERVER_HOST, HBASE_THRIFTSERVER_CONFIG, HBASE_GW_HOSTS, HBASE_GW_CONFIG)
   print "Deployed HBase service " + HBASE_SERVICE_NAME + " using HMaster on " + HBASE_HM_HOST + " and RegionServers on "
   PRETTY_PRINT.pprint(HBASE_RS_HOSTS)
   
   hive_service = deploy_hive(CLUSTER, HIVE_SERVICE_NAME, HIVE_SERVICE_CONFIG, HIVE_HMS_HOST, HIVE_HMS_CONFIG, HIVE_HS2_HOST, HIVE_HS2_CONFIG, HIVE_WHC_HOST, HIVE_WHC_CONFIG, HIVE_GW_HOSTS, HIVE_GW_CONFIG)
   print "Depoyed Hive service " + HIVE_SERVICE_NAME + " using HiveMetastoreServer on " + HIVE_HMS_HOST + " and HiveServer2 on " + HIVE_HS2_HOST
   init_hive(hive_service)
   print "Initialized Hive service"
   
   impala_service = deploy_impala(CLUSTER, IMPALA_SERVICE_NAME, IMPALA_SERVICE_CONFIG, IMPALA_SS_HOST, IMPALA_SS_CONFIG, IMPALA_CS_HOST, IMPALA_CS_CONFIG, IMPALA_ID_HOSTS, IMPALA_ID_CONFIG)
   print "Deployed Impala service " + IMPALA_SERVICE_NAME + " using StateStore on " + IMPALA_SS_HOST + ", CatalogServer on " + IMPALA_CS_HOST + ", and ImpalaDaemons on "
   PRETTY_PRINT.pprint(IMPALA_ID_HOSTS)
   
   #Need to start the cluster now as subsequent services need the cluster to be runnign
   #TODO can we just start ZK, and maybe HDFS, instead of everything? It's just needed for the search service
   print "About to restart cluster"
   CLUSTER.stop().wait()
   CLUSTER.start().wait()
   print "Done restarting cluster"

   search_service = deploy_search(CLUSTER, SEARCH_SERVICE_NAME, SEARCH_SERVICE_CONFIG, SEARCH_SOLR_HOST, SEARCH_SOLR_CONFIG, SEARCH_GW_HOSTS, SEARCH_GW_CONFIG)
   print "Deployed Search service " + SEARCH_SERVICE_NAME + " using SOLRHost " + SEARCH_SOLR_HOST
   
   flume_service = deploy_flume(CLUSTER, FLUME_SERVICE_NAME, FLUME_SERVICE_CONFIG, FLUME_AGENT_HOSTS, FLUME_AGENT_CONFIG)
   print "Deployed Flume service " + FLUME_SERVICE_NAME + " using FlumeAgents on "
   PRETTY_PRINT.pprint(FLUME_AGENT_HOSTS)
   
   oozie_service = deploy_oozie(CLUSTER, OOZIE_SERVICE_NAME, OOZIE_SERVICE_CONFIG, OOZIE_SERVER_HOST, OOZIE_SERVER_CONFIG)
   print "Deployed Oozie service " + OOZIE_SERVICE_NAME + " using OozieServer on " + OOZIE_SERVER_HOST
   
   sqoop_service = deploy_sqoop(CLUSTER, SQOOP_SERVICE_NAME, SQOOP_SERVICE_CONFIG, SQOOP_SERVER_HOST, SQOOP_SERVER_CONFIG)
   print "Deployed Sqoop service " + SQOOP_SERVICE_NAME + " using SqoopServer on " + SQOOP_SERVER_HOST
   
   hue_service = deploy_hue(CLUSTER, HUE_SERVICE_NAME, HUE_SERVICE_CONFIG, HUE_SERVER_HOST, HUE_SERVER_CONFIG, HUE_KTR_HOST, HUE_KTR_CONFIG)
   print "Deployed HUE service " + HUE_SERVICE_NAME + " using HueServer on " + HUE_SERVER_HOST
   
   #deploy_accumulo(CLUSTER, ACCUMULO_SERVICE_NAME, ACCUMULO_SERVICE_CONFIG, ACCUMULO_MASTER_HOSTS, ACCUMULO_MASTER_CONFIG, ACCUMULO_TRACER_HOSTS, ACCUMULO_TRACER_CONFIG, ACCUMULO_TSERVER_HOSTS, ACCUMULO_TSERVER_CONFIG, ACCUMULO_LOGGER_HOSTS, ACCUMULO_LOGGER_CONFIG, ACCUMULO_MONITOR_HOST, ACCUMULO_MONITOR_CONFIG, ACCUMULO_GC_HOST, ACCUMULO_GC_CONFIG, ACCUMULO_GATEWAY_HOSTS, ACCUMULO_GATEWAY_CONFIG)
   
   print "About to restart cluster."
   CLUSTER.stop().wait()
   CLUSTER.start().wait()
   print "Done restarting cluster."
   
   post_startup(CLUSTER, hdfs_service, oozie_service)

   print "Finished deploying Cloudera cluster. Go to http://" + CM_HOST + ":7180 to administer the cluster."
   print "If the Oozie service (and therefore the HUE service as well, which depends on it) did not start properly, go to the Oozie service, stop it, click on the Actions button and choose 'Create Database', then start it."
   print "If there are any other services not running, restart them now."
Ejemplo n.º 27
0
class ClouderaManager(object):
    """
    The complete orchestration of a cluster from start to finish assuming all the hosts are
    configured and Cloudera Manager is installed with all the required databases setup.

    Handle all the steps required in creating a cluster. All the functions are built to function
    idempotently. So you should be able to resume from any failed step but running thru the
    __class__.setup()
    """

    def __init__(self, module, config, trial=False, license_txt=None):
        self.api = ApiResource(config['cm']['host'], username=config['cm']['username'],
                               password=config['cm']['password'])
        self.manager = self.api.get_cloudera_manager()
        self.config = config
        self.module = module
        self.trial = trial
        self.license_txt = license_txt
        self.cluster = None

    def enable_license(self):
        """
        Enable the requested license, either it's trial mode or a full license is entered and
        registered.
        """
        try:
            _license = self.manager.get_license()
        except ApiException:
            print_json(type="LICENSE", msg="Enabling license")
            if self.trial:
                self.manager.begin_trial()
            else:
                if license_txt is not None:
                    self.manager.update_license(license_txt)
                else:
                    fail(self.module, 'License should be provided or trial should be specified')

            try:
                _license = self.manager.get_license()
            except ApiException:
                fail(self.module, 'Failed enabling license')
        print_json(type="LICENSE",
                   msg="Owner: {}, UUID: {}".format(_license.owner, _license.uuid))

    def create_cluster(self):
        """
        Create a cluster and add hosts to the cluster. A new cluster is only created
        if another one doesn't exist with the same name.
        """
        print_json(type="CLUSTER", msg="Creating cluster")
        cluster_config = self.config['cluster']
        try:
            self.cluster = self.api.get_cluster(cluster_config['name'])
        except ApiException:
            print_json(type="CLUSTER",
                       msg="Creating Cluster entity: {}".format(cluster_config['name']))
            self.cluster = self.api.create_cluster(cluster_config['name'],
                                                   cluster_config['version'],
                                                   cluster_config['fullVersion'])

        cluster_hosts = [self.api.get_host(host.hostId).hostname
                         for host in self.cluster.list_hosts()]
        hosts = []
        for host in cluster_config['hosts']:
            if host not in cluster_hosts:
                hosts.append(host)
        self.cluster.add_hosts(hosts)

    def activate_parcels(self):
        print_json(type="PARCELS", msg="Setting up parcels")
        for parcel_cfg in self.config['parcels']:
            parcel = Parcels(self.module, self.manager, self.cluster,
                             parcel_cfg.get('version'), parcel_cfg.get('repo'),
                             parcel_cfg.get('product', 'CDH'))
            parcel.download()
            parcel.distribute()
            parcel.activate()

    @retry(attempts=20, delay=5)
    def wait_inspect_hosts(self, cmd):
        """
        Inspect all the hosts. Basically wait till the check completes on all hosts.

        :param cmd: A command instance used for tracking the status of the command
        """
        print_json(type="HOSTS", msg="Inspecting hosts")
        cmd = cmd.fetch()
        if cmd.success is None:
            raise ApiException("Waiting on command {} to finish".format(cmd))
        elif not cmd.success:
            if (cmd.resultMessage is not None and
                    'is not currently available for execution' in cmd.resultMessage):
                raise ApiException('Retry Command')
            fail(self.module, 'Host inspection failed')
        print_json(type="HOSTS", msg="Host inspection completed: {}".format(cmd.resultMessage))

    def deploy_mgmt_services(self):
        """
        Configure, deploy and start all the Cloudera Management Services.
        """
        print_json(type="MGMT", msg="Deploying Management Services")
        try:
            mgmt = self.manager.get_service()
            if mgmt.serviceState == 'STARTED':
                return
        except ApiException:
            print_json(type="MGMT", msg="Management Services don't exist. Creating.")
            mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo())

        for role in config['services']['MGMT']['roles']:
            if not len(mgmt.get_roles_by_type(role['group'])) > 0:
                print_json(type="MGMT", msg="Creating role for {}".format(role['group']))
                mgmt.create_role('{}-1'.format(role['group']), role['group'], role['hosts'][0])

        for role in config['services']['MGMT']['roles']:
            role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format(role['group']))
            role_group.update_config(role.get('config', {}))

        mgmt.start().wait()
        if self.manager.get_service().serviceState == 'STARTED':
            print_json(type="MGMT", msg="Management Services started")
        else:
            fail(self.module, "[MGMT] Cloudera Management services didn't start up properly")

    def service_orchestrate(self, services):
        """
        Create, pre-configure provided list of services
        Stop/Start those services
        Perform and post service startup actions

        :param services: List of Services to perform service specific actions
        """
        service_classes = []

        # Create and pre-configure provided services
        for service in services:
            service_config = self.config['services'].get(service.upper())
            if service_config:
                svc = getattr(sys.modules[__name__], service)(self.cluster, service_config)
                if not svc.started:
                    svc.deploy()
                    svc.pre_start()
                service_classes.append(svc)

        print_json(type="CLUSTER", msg="Starting services: {} on Cluster".format(services))

        # Deploy all the client configs, since some of the services depend on other services
        # and is essential that the client configs are in place
        self.cluster.deploy_client_config()

        # Start each service and run the post_start actions for each service
        for svc in service_classes:
            # Only go thru the steps if the service is not yet started. This helps with
            # re-running the script after fixing errors
            if not svc.started:
                svc.start()
                svc.post_start()

    def setup(self):
        # TODO(rnirmal): Cloudera Manager SSL?

        # Enable a full license or start a trial
        self.enable_license()

        # Create the cluster entity and associate hosts
        self.create_cluster()

        # Download and activate the parcels
        self.activate_parcels()

        # Inspect all the hosts
        self.wait_inspect_hosts(self.manager.inspect_hosts())

        # Create Management services
        self.deploy_mgmt_services()

        # Configure and Start base services
        self.service_orchestrate(BASE_SERVICES)

        # Configure and Start remaining services
        self.service_orchestrate(ADDITIONAL_SERVICES)
Ejemplo n.º 28
0
def create_cluster(config_dict):
    config.read([
        './conf/hadrian.ini', './conf/cluster_specs.ini',
        './conf/cloudera-manager/cm.ini'
    ])

    cm_cluster_name = config_grabber("Globals")['cm.cluster.name']
    cm_username = config_grabber("Globals")['cm.username']
    cm_password = config_grabber("Globals")['cm.password']
    cm_port = config_grabber("Globals")['cm.port']
    version = config_grabber('Globals')['cdh.cluster.version']
    cm_server = config_grabber(cm_cluster_name + '-en')['cm.server']

    #Grab all configuration files in the directory with the CM Cluster Name.

    for i in os.listdir('./conf/' + cm_cluster_name):
        config.read('./conf/' + cm_cluster_name + '/' + i)

    all_nodes = list()

    while (get_cm_status(cm_server + ':' + cm_port) != 200):
        print 'Waiting for CM Server to start... '
        time.sleep(15)

    api = ApiResource(cm_server, cm_port, cm_username, cm_password)
    # create cluster
    cluster = api.create_cluster(cm_cluster_name, version.upper())

    #Config CM
    print 'Applying any configuration changes to Cloudera Manager'
    cmanager = api.get_cloudera_manager()
    cmanager.update_config(config_grabber('cloudera-manager-updates'))

    planned_nodes = config_grabber(cm_cluster_name +
                                   '-en')['full.list'].split(',')
    for k, v in config_grabber(cm_cluster_name + '-dn').iteritems():
        for j in v.split(','):
            planned_nodes.append(j)

    # TODO make this smarter.  show which agents haven't checked in.  Add the option to continue without them.
    if len(api.get_all_hosts()) != len(planned_nodes):
        print 'Waiting for all agents to check into the CM Server before continuing.'

        while len(planned_nodes) > api.get_all_hosts():
            print 'Waiting for the final set of CM Agent nodes to check in.'
            time.sleep(5)

    print 'Updating Rack configuration for data nodes.'
    all_hosts = list()
    for host in api.get_all_hosts():
        all_hosts.append(host.hostId)
        for k, v in config_grabber(cm_cluster_name + '-dn').iteritems():
            if host.hostname in v:
                print 'Setting host: ' + host.hostname + ' to rack /default/' + k
                host.set_rack_id('/default/' + k)

    print 'Adding all hosts to cluster.'
    cluster.add_hosts(all_hosts)

    # download CDH Parcels
    # TODO add some logic here to make the parcel list something that's read from the hadrian.ini
    # This will allow support for other CDH packages, Search, etc.
    if config_grabber('Globals')['cdh.distribution.method'] == 'parcels':
        distribute_parcel(cluster, 'CDH',
                          config_grabber("Globals")['cdh.parcel.version'])

    if config_dict.get('hdfs_ha') == True:
        create_zookeeper_service(config_dict, cluster)
    create_hdfs_service(config_dict, cluster)

    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configurations'
    else:
        print 'Client configuration deployment complete.'

    create_mapred_service(config_dict, cluster, cm_server)
    if config_dict.get('hbase') == True:
        if config_dict.get('hdfs_ha') == False:
            create_zookeeper_service(config_dict, cluster)
        create_hbase_service(config_dict, cluster)
    if config_dict.get('hive') == True:
        create_hive_service(config_dict, cluster)
    print 'Starting final client configuration deployment for all services.'
    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configuration.'
    else:
        print 'Client configuration deployment complete.  The cluster is all yours.  Happy Hadooping.'
Ejemplo n.º 29
0
from cm_api.api_client import ApiResource

CM_HOST = "127.0.0.1"
ADMIN_USER = "******"
ADMIN_PASS = "******"

API = ApiResource(CM_HOST,
                  version=14,
                  username=ADMIN_USER,
                  password=ADMIN_PASS)
MANAGER = API.get_cloudera_manager()
mgmt = MANAGER.get_service()

print "restart mgmt..."
mgmt.restart().wait()

print "TIP cluster..."
tip = API.get_cluster("TIP")
tip.restart().wait()
#!/usr/bin/env python
#Author: Pratap Raj
#Purpose: Start Cloudera Management services

import sys
import socket
from cm_api.api_client import ApiResource
from cm_api.endpoints.cms import ClouderaManager

#########
# Do not edit any system variables here. They are all passed from the startstopcluster.sh script, so make changes there.
cmhost=str(sys.argv[1])
cmport=str(sys.argv[2])
cmusername=str(sys.argv[3])
cmpassword=str(sys.argv[4])
tlspref=str(sys.argv[5])
#########

api = ApiResource(cmhost, server_port=cmport, username=cmusername, password=cmpassword, use_tls=tlspref)

mgmt=api.get_cloudera_manager().get_service()
cmstartstatus=mgmt.start().wait()
print cmstartstatus.success
Ejemplo n.º 31
0
def main():
  module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS))

  api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=9)
  cluster_name = CLUSTER_NAME

  manager = api.get_cloudera_manager()

  action_a = module.params.get('action', None)

  if action_a == 'create_cluster':
    license_a = module.params.get('license', None)
    version_a = module.params.get('version', None)

    cluster_list = [x.name for x in api.get_all_clusters()]
    if cluster_name in cluster_list:
      module.exit_json(changed=False, msg='Cluster exists')
    else:
      cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a)
      if license_a == None:
        manager.begin_trial()
      else:
        manager.update_license(license_a.decode('base64'))
      module.exit_json(changed=True, msg='Cluster created')
  elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster', 'create_snapshot_policy', 'deploy_configuration']:
    # more complicated actions that need a created cluster go here
    cluster = api.get_cluster(cluster_name)
    host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts())

    # adds a host to the cluster
    # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal
    if action_a == 'add_host':
      host_a = module.params.get('host', None)

      host_list = host_map.keys()
      if host_a in host_list:
        module.exit_json(changed=False, msg='Host already in cluster')
      else:
        try:
          cluster.add_hosts([host_a])
        except ApiException:
          # if a host isn't there, it could be because the agent didn't manage to connect yet
          # so let's wait a moment for it
          sleep(120)
          cluster.add_hosts([host_a])

        module.exit_json(changed=True, msg='Host added')

    # create management service and set it's basic configuration
    # this needs a separate function since management is handled
    # differently than the rest of services
    elif action_a == 'create_mgmt':
      host_a = module.params.get('host', None)

      # getting the management service is the only way to check if mgmt exists
      # an exception means there isn't one
      try:
        mgmt = manager.get_service()
        module.exit_json(changed=False, msg='Mgmt service already exists')
      except ApiException:
        pass

      mgmt = manager.create_mgmt_service(ApiServiceSetupInfo())

      # this is ugly... and I see no good way to unuglify it
      firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")
      reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")

      # since there is no easy way of configuring the manager... let's do it here :(
      role_conf = defaultdict(dict)
      role_conf['ACTIVITYMONITOR'] = {
          'firehose_database_host': '{0}:7432'.format(host_a),
          'firehose_database_user': '******',
          'firehose_database_password': firehose_passwd,
          'firehose_database_type': 'postgresql',
          'firehose_database_name': 'amon',
          'firehose_heapsize': '268435456',
      }
      role_conf['EVENTSERVER'] = {
          'event_server_heapsize': '215964392'
      }
      role_conf['REPORTSMANAGER'] = {
          'headlamp_database_host': '{0}:7432'.format(host_a),
          'headlamp_database_user': '******',
          'headlamp_database_password': reports_passwd,
          'headlamp_database_type': 'postgresql',
          'headlamp_database_name': 'rman',
          'headlamp_heapsize': '268435456',
      }

      roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER']
      # create mangement roles
      for role in roles:
        mgmt.create_role('{0}-1'.format(role), role, host_map[host_a])

      # update configuration of each
      for group in mgmt.get_all_role_config_groups():
        group.update_config(role_conf[group.roleType])

      mgmt.start().wait()
      # after starting this service needs time to spin up
      sleep(30)
      module.exit_json(changed=True, msg='Mgmt created and started')

    # deploy a given parcel on all hosts in the cluster
    # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4
    elif action_a == 'deploy_parcel':
      name_a = module.params.get('name', None)
      version_a = module.params.get('version', None)

      if "latest" in version_a:
        available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a]
        if "-latest" in version_a:
          version_substr = match('(.+?)-latest', version_a).group(1)
        # if version is just "latest", try to check everything
        else:
          version_substr = ".*"
        try:
          [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None]
        except ValueError:
          module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions))
      else:
        version_parcel = version_a

      # we now go through various stages of getting the parcel
      # as there is no built-in way of waiting for an operation to complete
      # we use loops with sleep to get it done
      parcel = cluster.get_parcel(name_a, version_parcel)
      if parcel.stage == 'AVAILABLE_REMOTELY':
        parcel.start_download()

        while parcel.stage != 'DOWNLOADED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          sleep(10)

      if parcel.stage == 'DOWNLOADED':
        parcel.start_distribution()

        while parcel.stage != 'DISTRIBUTED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          # sleep while hosts report problems after the download
          for i in range(12):
            sleep(10)
            if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
              break

      # since parcels are distributed automatically when a new host is added to a cluster
      # we can encounter the ,,ACTIVATING'' stage then
      if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING':
        if parcel.stage == 'DISTRIBUTED':
          parcel.activate()

        while parcel.stage != 'ACTIVATED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          # this sleep has to be large because although the operation is very fast
          # it makes the management and cloudera hosts go bonkers, failing all of the health checks
          sleep(10)

        # sleep while hosts report problems after the distribution
        for i in range(60):
          sleep(10)
          if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
            break

        module.exit_json(changed=True, msg='Parcel activated')

      if parcel.stage == 'ACTIVATED':
        module.exit_json(changed=False, msg='Parcel already activated')

      # if we get down here, something is not right
      module.fail_json(msg='Invalid parcel state')

    # deploy nodes for workers, according to SERVICE_WORKER_MAP
    # also give them sane names and init zookeeper and kafka ones
    # which need id's specified
    elif action_a == 'deploy_service_worker_nodes':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      role_name = SERVICE_WORKER_MAP[service_a]['name']
      full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring']

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      nodes = [x for x in service.get_all_roles() if role_name in x.name]

      # if host already has the given group, we should skip it
      if host_map[host_a] in [x.hostRef.hostId for x in nodes]:
        module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name))
      # find out the highest id that currently exists
      else:
        node_names = [x.name for x in nodes]
        if len(node_names) == 0:
          # if no nodes, start numbering from 1
          node_i = 1
        else:
          # take the max number and add 1 to it
          node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1

        if service_name == 'ZOOKEEPER':
          role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a)
          # zookeeper needs a per-node ID in the configuration, so we set it now
          role.update_config({'serverId': node_i})
        elif service_name == 'KAFKA':
          role = service.create_role(full_role_name.format(node_i), role_name, host_a)
          # kafka needs a per-node ID in the configuration, so we set it now
          role.update_config({'broker.id': node_i})
        else:
          service.create_role(full_role_name.format(node_i), role_name, host_a)

        module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name))

    # deploy a service. just create it, don't do anything more
    # this is needed maily when we have to set service properties before role deployment
    elif action_a == 'deploy_service':
      name_a = module.params.get('name', None)

      if not name_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(name_a))
      service_name = SERVICE_MAP[name_a]
      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
        module.exit_json(changed=True, msg='{0} service created'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} service already exists'.format(service_name))

    # deploy the base hdfs roles (the namenode and secondary)
    # this doesn't create the service, as at least one datanode should already be added!
    # the format also requires certain properties to be set before we run it
    elif action_a == 'deploy_hdfs_base':
      nn_host_a = module.params.get('nn_host', None)
      sn_host_a = module.params.get('sn_host', None)

      changed = False

      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]

      # don't create a secondary namenode when:
      #- there is one that already exists
      #- there is a second namenode, which means we have HA and don't need a secondary
      if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles:
        hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a)
        changed = True

      # create a namenode and format it's FS
      # formating the namenode requires at least one datanode and secondary namenode already in the cluster!
      if not 'HDFS-NAMENODE' in hdfs_roles:
        hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a)
        for command in hdfs.format_hdfs('HDFS-NAMENODE'):
          if command.wait().success == False:
            module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage))
        changed = True

      module.exit_json(changed=changed, msg='Created HDFS service & NN roles')

    # enable HttpFS for HDFS
    # HUE require this for support HA in HDFS
    elif action_a == 'deploy_hdfs_httpfs':
      host_a = module.params.get('host', None)
      
      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]
      
      # don't install second instance of HttpFS
      if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0:
        module.exit_json(changed=False, msg='HDFS HttpFS service already exists')
       
      hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) 
        
      module.exit_json(changed=True, msg='HDFS HttpFS service created')
      
    # enable HA for HDFS
    # this deletes the secondary namenode and creates a second namenode in it's place
    # also, this spawns 3 journal node and 2 failover controller roles
    elif action_a == 'deploy_hdfs_ha':
      sn_host_a = module.params.get('sn_host', None)
      jn_dir_a = module.params.get('jn_dir', None)
      jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)]

      hdfs = cluster.get_service('HDFS')

      # if there's a second namenode, this means we already have HA enabled
      if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]:
        # this is bad and I should feel bad
        # jns is a list of dictionaries, each dict passes the required journalnode parameters
        jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': jn_dir_a, 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)]

        # this call is so long because we set some predictable names for the sevices
        command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER',
                                    active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2')

        children = command.wait().children
        for command_children in children:
          # The format command is expected to fail, since we already formated the namenode
          if command_children.name != 'Format' and command.success == False:
            module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for HDFS service')
      else:
        module.exit_json(changed=False, msg='HDFS HA already enabled')
    # enable HA for YARN
    elif action_a == 'deploy_rm_ha':
      sn_host_a = module.params.get('sn_host', None)

      yarn = cluster.get_service('YARN')

      # if there are two roles matching to this name, this means HA for YARN is enabled
      if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1:
        command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER')
        children = command.wait().children
        for command_children in children:
          if command.success == False:
            module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for YARN service')
      else:
        module.exit_json(changed=False, msg='YARN HA already enabled')

    # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP
    # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP
    elif action_a == 'deploy_base_roles':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      changed = False

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      service_roles = [x.name for x in service.get_all_roles()]

      # create each service from the map
      for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items():
        # check if role already exists, script cant compare it directly
        # after enabling HA on YARN roles will have random strings in names
        if len([0 for x in service_roles if match(role_name, x) != None]) == 0:
          service.create_role(role_name, cloudera_name, host_a)
          changed = True

          # init commmands
          if role_name in SERVICE_INIT_COMMANDS.keys():
            for command_to_run in SERVICE_INIT_COMMANDS[role_name]:
              # different handling of commands specified by name and
              # ones specified by an instance method
              if ismethod(command_to_run):
                command = command_to_run(service)
              else:
                command = service.service_command_by_name(command_to_run)

              if command.wait().success == False:
                module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage))

      if changed == True:
        module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name))

    # deploy configuration - it always return changed
    elif action_a == 'deploy_configuration':
      service_a = module.params.get('service', None)
      service_name = SERVICE_MAP[service_a]
      service = cluster.get_service(service_name)

      # deploying client configuration
      command = service.deploy_client_config()
      if command.wait().success == False:
        module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      module.exit_json(changed=True, msg='Configuration deployed')
        
    # set config values for a given service/role
    elif action_a == 'set_config':
      entity_a = module.params.get('entity', None)
      service_a = module.params.get('service', None)
      role_a = module.params.get('role', None)
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)

      if not service_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(service_a))

      # since management is handled differently, it needs a different service
      if service_a == 'management':
        service = manager.get_service()
      elif service_a == 'cm':
        service = manager
      else:
        service = cluster.get_service(SERVICE_MAP[service_a])

      # role and service configs are handled differently
      if entity_a == 'service':
        prev_config = service.get_config()
        curr_config = service.update_config({name_a: value_a})
        if service_a == 'cm':
          prev_config = [prev_config]
          curr_config = [curr_config]
        module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a]))

      elif entity_a == 'role':
        if not role_a in ROLE_MAP:
          module.fail_json(msg='Unknown role: {0}'.format(service))

        role = service.get_role_config_group(ROLE_MAP[role_a])
        prev_config = role.get_config()
        curr_config = role.update_config({name_a: value_a})
        module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a]))

      else:
        module.fail_json(msg='Invalid entity, must be one of service, role')

    # handle service state
    # currently this only can start/restart a service
    elif action_a == 'service':
      state_a = module.params.get('state', None)
      service_a = module.params.get('service', None)

      try:
        if service_a == 'cm':
          service = manager.get_service()
        else:
          service = cluster.get_service(SERVICE_MAP[service_a])
      except ApiException:
        module.fail_json(msg='Service does not exist')

      # when starting a service, we also deploy the client config for it
      if state_a == 'started':
        if service.serviceState == 'STARTED':
          module.exit_json(changed=False, msg='Service already running')
        method = service.start
        verb = "start"
      elif state_a == 'restarted':
        method = service.restart
        verb = "restart"

      try:
        command = service.deploy_client_config()
        if command.wait().success == False:
          module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      # since there is no way to check if a service handles client config deployments
      # we try our best and pass the exception if it doesn't
      except ApiException, AttributeError:
        pass

      method().wait()
      # we need to wait for cloudera checks to complete...
      # otherwise it will report as failing
      sleep(10)
      for i in range(24):
        sleep(10)
        service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
        if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
          break
      service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
      if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
        module.exit_json(changed=True, msg='Service {0} successful'.format(verb))
      else:
        module.fail_json(msg='Service {0} failed'.format(verb))

    # handle cluster
    # currently this only can restart
    elif action_a == 'cluster':
      state_a = module.params.get('state', None)

      if state_a == 'restarted':
        command = cluster.restart(redeploy_client_configuration=True)
        if command.wait().success == False:
          module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage))
        else:
          module.exit_json(changed=True, msg='Cluster restart successful')

    # Snapshot policy
    # only create is supported
    elif action_a == 'create_snapshot_policy':
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)
      service_a = module.params.get('service', None)
      service = cluster.get_service(SERVICE_MAP[service_a])
      payload=loads(value_a)
      # checking if policy already exists. Exception is expected when configure for the first time.
      try: 
        test = service.get_snapshot_policy(name_a)
        module.exit_json(changed=False, msg='Defined policy already exists')
      except ApiException:
        pass
      try:
        command = service.create_snapshot_policy(payload)
        module.exit_json(changed=True, msg='Snapshot policy was created.')
      except ApiException, AttributeError:
        module.fail_json(msg='ERROR in creating snapshot policy.')
Ejemplo n.º 32
0
hue_db_user = config.get('CLOUDERA_PROPERTIES', 'hue_db_user')
oozie_db_host = config.get('CLOUDERA_PROPERTIES', 'oozie_db_host')
oozie_db_name = config.get('CLOUDERA_PROPERTIES', 'oozie_db_name')
oozie_db_password = str(sys.argv[4])
oozie_db_user = config.get('CLOUDERA_PROPERTIES', 'oozie_db_user')
api_version = config.get('CLOUDERA_PROPERTIES', 'api_version')

# Get Cloudera Manager, config, and ODP Cluster
logging.info('Retrieving Cloudera Manager service and cluster instance')
api = ApiResource(cloudera_manager_server_api,
                  7180,
                  management_console_username,
                  management_console_password,
                  version=api_version)
cloudera_manager = ClouderaManager(api)
cloudera_manager_config = api.get_cloudera_manager().get_config(view='full')
cluster_name = 'Open Data Platform'
cluster = api.get_cluster(cluster_name)

# Retrieve all ApiHost objects, locate the management server and add others to clients
logging.info('Retrieving all hosts from cluster')
hosts = api.get_all_hosts()
clients = []
for host in hosts:
    # Suppress Clock Offset warning that incorrectly states chrony is not working
    host.update_config({'host_health_suppression_host_clock_offset': 'true'})

    # Separate Cloudera Manager Server from agents
    if host.hostname == cloudera_management_server_fqdn:
        cloudera_management_server = host
    else:
Ejemplo n.º 33
0
def install_java_8(region, stack_name):
    # following general protocol for upgrading to JDK 1.8 here:
    # http://www.cloudera.com/content/cloudera/en/documentation/core/v5-3-x/topics/cdh_cm_upgrading_to_jdk8.html
    ec2_conn = create_ec2_connection(region)
    manager_instance = get_manager_instance(ec2_conn, stack_name)
    cluster_instances = (
        get_worker_instances(ec2_conn, stack_name) +
        [manager_instance, get_master_instance(ec2_conn, stack_name)])
    cluster_hosts = [i.ip_address for i in cluster_instances]

    with cm_tunnel_ctx(manager_instance) as local_port:
        # Connect to CM API
        cm_api = ApiResource('localhost', username='******', password='******',
                             server_port=local_port, version=9)
        cloudera_manager = cm_api.get_cloudera_manager()

        # Stop Cloudera Management Service
        print "Stopping Cloudera Management Service"
        mgmt_service = cloudera_manager.get_service()
        mgmt_service.stop().wait()

        # Stop cluster
        print "Stopping the cluster"
        clusters = cm_api.get_all_clusters()
        cluster = clusters.objects[0]
        cluster.stop().wait()

    # Stop all Cloudera Manager Agents
    @parallel
    def stop_cm_agents():
        sudo('service cloudera-scm-agent stop')
    execute(stop_cm_agents, hosts=cluster_hosts)

    # Stop the Cloudera Manager Server
    def stop_cm_server():
        sudo('service cloudera-scm-server stop')
    execute(stop_cm_server, hosts=[manager_instance.ip_address])

    # Cleanup other Java versions and install JDK 1.8
    @parallel
    def swap_jdks():
        sudo('rpm -qa | grep jdk | xargs rpm -e')
        sudo('rm -rf /usr/java/jdk1.6*')
        sudo('rm -rf /usr/java/jdk1.7*')
        run('wget -O jdk-8-linux-x64.rpm --no-cookies --no-check-certificate '
            '--header "Cookie: oraclelicense=accept-securebackup-cookie" '
            'http://download.oracle.com/otn-pub/java/jdk/8u51-b16/'
            'jdk-8u51-linux-x64.rpm')
        sudo('yum install -y jdk-8-linux-x64.rpm')
        append('/home/ec2-user/.bash_profile',
               'export JAVA_HOME=`find /usr/java -name "jdk1.8*"`')
    execute(swap_jdks, hosts=cluster_hosts)

    # Start the Cloudera Manager Server
    def start_cm_server():
        sudo('service cloudera-scm-server start')
    execute(start_cm_server, hosts=[manager_instance.ip_address])

    # Start all Cloudera Manager Agents
    @parallel
    def start_cm_agents():
        sudo('service cloudera-scm-agent start')
    execute(start_cm_agents, hosts=cluster_hosts)

    with cm_tunnel_ctx(manager_instance) as local_port:
        # Connect to CM API
        cm_api = ApiResource('localhost', username='******', password='******',
                             server_port=local_port, version=9)
        cloudera_manager = cm_api.get_cloudera_manager()

        # Start the cluster and the mgmt service
        print "Starting the cluster"
        cluster.start().wait()
        print "Starting the Cloudera Management Service"
        cloudera_manager = cm_api.get_cloudera_manager()
        mgmt_service = cloudera_manager.get_service()
        mgmt_service.start().wait()
Ejemplo n.º 34
0
class DeployCloudEraCluster(object):
    """
    This class to define and setup the base properties of the cluster node for hadoop echo system
    """
    _cloudera_manager_host = None
    _port_number = None
    _user_name = None
    _password = None
    _version = 12

    def __init__(self, cloudera_manager_host, port_number, user_name, password,
                 version):
        """
        Initialize the object to provision the cluster node for the hadoop parcel based provision
        :param cloudera_manager_host:
        :param port_number:
        :param user_name:
        :param password:
        :param version:
        """
        self._cloudera_manager_host = cloudera_manager_host
        self._port_number = port_number
        self._user_name = user_name
        self._password = password
        self._version = version  # API version vary depending upon the job you want to perform. "1" if you want to check the cluster and 12 if you want to export the property of config
        self._cloudera_manager_oconnect = ApiResource(
            self._cloudera_manager_host,
            self._port_number,
            self._user_name,
            self._password,
            version=self._version)

    def get_cluster_versions(self):
        """
        To get all the provisioned cluster versions against the Cloud era manager
        :return:
        """
        for cluster in self._cloudera_manager_oconnect:
            print("%s = %s" % (cluster.name, cluster.version))
        return cluster

    def get_cluster_services(self, cdh_version):
        """
        To get all the provisioned cluster services against the specific cluster
        :return:
        """
        for srv in cdh_version.get_all_services():
            print srv
            if srv.type == "HDFS":
                hdfs = srv
                print hdfs.name, hdfs.serviceState, hdfs.healthSummary
                print hdfs.serviceUrl
                for chk in hdfs.healthChecks:
                    print "%s --- %s" % (chk['name'], chk['summary'])

    def get_cluster_roles_info(self, cdh_version):
        """
        To get the details of all the roles for each cluster node
        :return:
        """
        for role in cdh_version.get_all_roles():
            if role.type == 'NAMENODE':
                namenode = role
        print "Role name: %s\nState: %s\nHealth: %s\nHost: %s" % (
            namenode.name, namenode.roleState, namenode.healthSummary,
            namenode.hostRef.hostId)

    def get_cdh_metrics_details(self, cdh_version):
        """
        To get the CDH metrics containing details about all the activities in the cluster node
        :param cdh_version:
        :return:
        """
        metrics = cdh_version.get_metrics()
        for metric in metrics:
            print "%s (%s)" % (metric.name, metric.unit)

    def start_service(self, cdh_service_name):
        """
        To start or stop the CDH service
        :param cdh_service_name:
        :return:
        """
        service = cdh_service_name.restart()
        print service.active

        service_status = service.wait()
        print "Active: %s. Success: %s" % (service_status.active,
                                           service_status.success)

    def restart_service(self, cdh_service_name, namenode):
        """
        To restart the service of the specific role
        :param cdh_service_name:
        :param namenode:
        :return:
        """
        commands = cdh_service_name.restart_roles(namenode.name)
        for command in commands:
            print command

    def configure_services(self, cdh_service_name):
        """
        To configure the specific services with available roles
        :return:
        """
        for name, config in cdh_service_name.get_config(
                view='full')[0].items():
            print "%s - %s - %s" % (name, config.relatedName,
                                    config.description)

    def export_cluster_template(self, template_filename, cluster_name):
        """
        To export the current cluster configuration into the given file.
        :param template_filename:
        :return:
        """
        cluster = self._cloudera_manager_oconnect.get_cluster(cluster_name)
        cdh_template = cluster.export()
        with open(template_filename, 'w') as outfile:
            json.dump(cdh_template.to_json_dict(),
                      outfile,
                      indent=4,
                      sort_keys=True)

    def import_cluster_template(self, template_filename, cluster_name):
        """
        To import cluster template configuration into given cluster
        :param template_filename:
        :param cluster_name:
        :return:
        """
        cluster = self._cloudera_manager_oconnect.get_cluster(cluster_name)
        with open(template_filename) as data_file:
            data = json.load(data_file)
        template = ApiClusterTemplate(cluster).from_json_dict(data, cluster)
        cms = ClouderaManager(cluster)
        command = cms.import_cluster_template(template)
        print(command)

    def deploy_cloudera_manager_services(self):
        """
        To deploy the cloudera manager services
        :return:
        """
        varEnableConfigAlerts = True
        varServiceGroupName = "cloudera-scm"
        varServiceUserName = "******"
        varMgmtServiceConfig = {
            'enable_config_alerts': varEnableConfigAlerts,
            'process_groupname': varServiceGroupName,
            'process_username': varServiceUserName,
        }
        varManager = self._cloudera_manager_oconnect.get_cloudera_manager()
        varMgmt = varManager.create_mgmt_service(ApiServiceSetupInfo())

        # update the cloudera service config
        varMgmt.update_config(varMgmtServiceConfig)

        # Get the cloudera services configured
        services = varManager.get_service()

        varMgmt.create_role("ACTIVITYMONITOR-1", "ACTIVITYMONITOR",
                            self._cloudera_manager_host)
        varMgmt.create_role("ALERTPUBLISHER-1", "ALERTPUBLISHER",
                            self._cloudera_manager_host)
        varMgmt.create_role("EVENTSERVER-1", "EVENTSERVER",
                            self._cloudera_manager_host)
        varMgmt.create_role("HOSTMONITOR-1", "HOSTMONITOR",
                            self._cloudera_manager_host)
        varMgmt.create_role("SERVICEMONITOR-1", "SERVICEMONITOR",
                            self._cloudera_manager_host)
        varMgmt.create_role("REPORTSMANAGER-1", "REPORTSMANAGER",
                            self._cloudera_manager_host)

    def deploy_activity_monitor(self):
        """
        To deploy the Activity monitor services
        :return:
        """
        varActivityMonitorPassword = "******"

        varMgmt = self._cloudera_manager_oconnect.get_service()

        # config for the activity monitoring
        varActivityMonitorConfig = {
            'firehose_database_host':
            "pocd-cm581-dev-manager.poc-d.internal" + ":" + "7432",
            'firehose_database_user':
            "******",
            'firehose_database_password':
            varActivityMonitorPassword,
            'firehose_database_type':
            "postgresql",
            'firehose_database_name':
            "amon",
            'firehose_heapsize':
            268435456,
            'mgmt_log_dir':
            "/opt/cloudera/log/cloudera-scm-firehose",
            'oom_heap_dump_dir':
            "/tmp",
            'oom_heap_dump_enabled':
            False,
            'max_log_backup_index':
            10,
            'max_log_size':
            100,
            'log_threshold':
            "INFO",
            'enable_config_alerts':
            "true",
        }
        varRole = varMgmt.get_role("ACTIVITYMONITOR-1")
        varRole.update_config(varActivityMonitorConfig)

    def deploy_alert_publisher(self):
        """
        To deploy the alert publisher
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varAlertPublisherConfig = {
            'alert_heapsize': 268435456,
            'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-alertpublisher",
            'oom_heap_dump_dir': "/tmp",
            'oom_heap_dump_enabled': False,
            'max_log_backup_index': 10,
            'max_log_size': 100,
            'log_threshold': "INFO",
            'enable_config_alerts': True,
        }
        varRole = varMgmt.get_role("ALERTPUBLISHER-1")
        varRole.update_config(varAlertPublisherConfig)

    def deploy_event_server(self):
        """
        To deploy event server
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varEventServerConfig = {
            'event_server_heapsize': 268435456,
            'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-eventserver",
            'eventserver_index_dir':
            "/opt/cloudera/lib/cloudera-scm-eventserver",
            'oom_heap_dump_dir': "/tmp",
            'oom_heap_dump_enabled': False,
            'max_log_backup_index': 10,
            'max_log_size': 100,
            'log_threshold': "INFO",
            'enable_config_alerts': True,
        }
        varRole = varMgmt.get_role("EVENTSERVER-1")
        varRole.update_config(varEventServerConfig)

    def deploy_host_monitor(self):
        """
        To deploy host monitor
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varHostMonitorConfig = {
            'firehose_heapsize': 268435456,
            'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose",
            'firehose_storage_dir': "/opt/cloudera/lib/cloudera-host-monitor",
            'oom_heap_dump_dir': "/tmp",
            'oom_heap_dump_enabled': False,
            'max_log_backup_index': 10,
            'max_log_size': 100,
            'log_threshold': "INFO",
            'enable_config_alerts': True,
        }
        varRole = varMgmt.get_role("HOSTMONITOR-1")
        varRole.update_config(varHostMonitorConfig)

    def deploy_service_monitor(self):
        """
        To deploy the service monitor
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varServiceMonitorConfig = {
            'firehose_heapsize': 268435456,
            'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose",
            'firehose_storage_dir':
            "/opt/cloudera/lib/cloudera-service-monitor",
            'oom_heap_dump_dir': "/tmp",
            'oom_heap_dump_enabled': False,
            'max_log_backup_index': 10,
            'max_log_size': 100,
            'log_threshold': "INFO",
            'enable_config_alerts': True,
        }

        varRole = varMgmt.get_role("SERVICEMONITOR-1")
        varRole.update_config(varServiceMonitorConfig)

    def deploy_report_manager(self):
        """
        To deploy the service Report Manager
        :return:
        """
        varReportManagerPassword = "******"
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varReportManagerConfig = {
            'headlamp_database_host':
            "pocd-cm581-dev-manager.poc-d.internal" + ":" + "7432",
            'headlamp_database_user':
            "******",
            'headlamp_database_password':
            varReportManagerPassword,
            'headlamp_database_type':
            "postgresql",
            'headlamp_database_name':
            "rman",
            'headlamp_heapsize':
            536870912,
            'mgmt_log_dir':
            "/opt/cloudera/log/cloudera-scm-headlamp",
            'headlamp_scratch_dir':
            "/opt/cloudera/lib/cloudera-scm-headlamp",
            'oom_heap_dump_dir':
            "/tmp",
            'oom_heap_dump_enabled':
            False,
            'max_log_backup_index':
            10,
            'max_log_size':
            100,
            'log_threshold':
            "INFO",
            'enable_config_alerts':
            True,
        }
        varRole = varMgmt.get_role("REPORTSMANAGER-1")
        varRole.update_config(varReportManagerConfig)

    def deploy_services(self):
        """
        To deploy all the cloudera manager services
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varMgmt.start().wait()

    def create_hadoop_cluster(self):
        """
        To create hadoop cluster with multiple data and name nodes and configure different services
        :return:
        """
        varClusterName = "POC-D Cluster"
        varCDHVersion = "CDH5"
        varCDHFullVersion = "5.8.0"

        varCluster = varApiResource.create_cluster(varClusterName,
                                                   varCDHVersion,
                                                   varCDHFullVersion)
Ejemplo n.º 35
0
def do_call(user, password, man_host, man_port, cluster_name, parcel_name,
            parcel_version, parcel_repo, init_pre_dir, init_post_dir):
    api = ApiResource(man_host, man_port, user, password, False,
                      MAN_API_VERSION)
    if not parcel_repo.endswith('/'):
        parcel_repo += '/'
    if re.match(REGEX_VERSION, parcel_version) is None or re.match(
            REGEX_VERSION, parcel_version).group() != parcel_version:
        raise Exception('Parcel [' + parcel_name +
                        '] is qualified by invalid version [' +
                        parcel_version +
                        '] expected to match regular expression [' +
                        REGEX_VERSION + ']')
    if not parcel_repo.endswith(parcel_version + '/'):
        raise Exception('Parcel [' + parcel_name +
                        '] is qualified by invalid version [' +
                        parcel_version + '] when compared with repository [' +
                        parcel_repo + ']')
    cm_config = api.get_cloudera_manager().get_config(view='full')
    repo_config = cm_config['REMOTE_PARCEL_REPO_URLS']
    repo_list = repo_config.value or repo_config.default
    if parcel_repo not in repo_list:
        repo_list += ',' + parcel_repo
        api.get_cloudera_manager().update_config(
            {'REMOTE_PARCEL_REPO_URLS': repo_list})
        time.sleep(
            POLL_SEC
        )  # The parcel synchronize end-point is not exposed via the API, so sleep instead
    cluster_names = []
    if cluster_name is None:
        for cluster in api.get_all_clusters():
            cluster_names.append(cluster.name)
    else:
        cluster_names.append(cluster_name)
    for cluster_name_itr in cluster_names:
        print 'Cluster [DEPLOYMENT] starting ... '
        cluster = api.get_cluster(cluster_name_itr)
        parcel = cluster.get_parcel(parcel_name, parcel_version)
        parcel_already_activated = False
        print 'Parcel [DEPLOYMENT] starting ... '
        if parcel.stage == 'ACTIVATED':
            parcel_already_activated = True
            print 'Parcel [DEPLOYMENT] already deployed'
        else:
            do_parcel_op(cluster, parcel_name, parcel_version, 'DOWNLOAD',
                         'AVAILABLE_REMOTELY', 'DOWNLOADED', 'start_download')
            do_parcel_op(cluster, parcel_name, parcel_version, 'DISTRIBUTE',
                         'DOWNLOADED', 'DISTRIBUTED', 'start_distribution')
            do_parcel_op(cluster, parcel_name, parcel_version, 'ACTIVATE',
                         'DISTRIBUTED', 'ACTIVATED', 'activate')
            parcel = cluster.get_parcel(parcel_name, parcel_version)
            if parcel.stage != 'ACTIVATED':
                raise Exception('Parcel is currently mid-stage [' +
                                parcel.stage +
                                '], please wait for this to complete')
        print 'Parcel [DEPLOYMENT] finished'
        if init_pre_dir is not None and os.path.isdir(init_pre_dir):
            print 'Cluster [PRE_INIT] starting ... '
            for script in glob.glob(init_pre_dir + '/*.sh'):
                subprocess.call([script])
            print 'Cluster [PRE_INIT] finished'
        if not parcel_already_activated:
            print 'Cluster [CONFIG_DEPLOYMENT] starting ... '
            cluster.deploy_client_config()
            cmd = cluster.deploy_client_config()
            if not cmd.wait(TIMEOUT_SEC).success:
                raise Exception('Failed to deploy client configs')
            print 'Cluster [CONFIG_DEPLOYMENT] finished'
            print 'Cluster [RESTART] starting ... '
            for service in cluster.get_all_services():
                if service.type == 'FLUME':
                    service.restart().wait()
                if service.type == 'HIVE':
                    service.restart().wait()
                if service.type == 'YARN':
                    service.restart().wait()
            print 'Cluster [RESTART] finished'
        if init_post_dir is not None and os.path.isdir(init_post_dir):
            print 'Cluster [POST_INIT] starting ... '
            for script in glob.glob(init_post_dir + '/*.sh'):
                subprocess.call([script])
            print 'Cluster [POST_INIT] finished'
        print 'Cluster [DEPLOYMENT] finished'
Ejemplo n.º 36
0
#!/usr/bin/env python
#Author: Pratap Raj
#Purpose: Start Cloudera Management services

import sys
import socket
from cm_api.api_client import ApiResource
from cm_api.endpoints.cms import ClouderaManager

#########
# Do not edit any system variables here. They are all passed from the startstopcluster.sh script, so make changes there.
cmhost = str(sys.argv[1])
cmport = str(sys.argv[2])
cmusername = str(sys.argv[3])
cmpassword = str(sys.argv[4])
tlspref = str(sys.argv[5])
#########

api = ApiResource(cmhost,
                  server_port=cmport,
                  username=cmusername,
                  password=cmpassword,
                  use_tls=tlspref)

mgmt = api.get_cloudera_manager().get_service()
cmstartstatus = mgmt.start().wait()
print cmstartstatus.success
Ejemplo n.º 37
0
def main():
  module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS))

  api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=10)
  cluster_name = CLUSTER_NAME

  manager = api.get_cloudera_manager()

  action_a = module.params.get('action', None)

  if action_a == 'create_cluster':
    license_a = module.params.get('license', None)
    version_a = module.params.get('version', None)

    cluster_list = [x.name for x in api.get_all_clusters()]
    if cluster_name in cluster_list:
      module.exit_json(changed=False, msg='Cluster exists')
    else:
      cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a)
      if license_a == None:
        manager.begin_trial()
      else:
        manager.update_license(license_a.decode('base64'))
      module.exit_json(changed=True, msg='Cluster created')
  elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster','create_snapshot_policy']:
    # more complicated actions that need a created cluster go here
    cluster = api.get_cluster(cluster_name)
    host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts())

    # adds a host to the cluster
    # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal
    if action_a == 'add_host':
      host_a = module.params.get('host', None)

      host_list = host_map.keys()
      if host_a in host_list:
        module.exit_json(changed=False, msg='Host already in cluster')
      else:
        try:
          cluster.add_hosts([host_a])
        except ApiException:
          # if a host isn't there, it could be because the agent didn't manage to connect yet
          # so let's wait a moment for it
          sleep(120)
          cluster.add_hosts([host_a])

        module.exit_json(changed=True, msg='Host added')

    # create management service and set it's basic configuration
    # this needs a separate function since management is handled
    # differently than the rest of services
    elif action_a == 'create_mgmt':
      host_a = module.params.get('host', None)

      # getting the management service is the only way to check if mgmt exists
      # an exception means there isn't one
      try:
        mgmt = manager.get_service()
        module.exit_json(changed=False, msg='Mgmt service already exists')
      except ApiException:
        pass

      mgmt = manager.create_mgmt_service(ApiServiceSetupInfo())

      # this is ugly... and I see no good way to unuglify it
      firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")
      reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")

      # since there is no easy way of configuring the manager... let's do it here :(
      role_conf = defaultdict(dict)
      role_conf['ACTIVITYMONITOR'] = {
          'firehose_database_host': '{0}:7432'.format(host_a),
          'firehose_database_user': '******',
          'firehose_database_password': firehose_passwd,
          'firehose_database_type': 'postgresql',
          'firehose_database_name': 'amon',
          'firehose_heapsize': '268435456',
      }
      role_conf['EVENTSERVER'] = {
          'event_server_heapsize': '215964392'
      }
      role_conf['REPORTSMANAGER'] = {
          'headlamp_database_host': '{0}:7432'.format(host_a),
          'headlamp_database_user': '******',
          'headlamp_database_password': reports_passwd,
          'headlamp_database_type': 'postgresql',
          'headlamp_database_name': 'rman',
          'headlamp_heapsize': '215964392',
      }

      roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER']
      # create mangement roles
      for role in roles:
        mgmt.create_role('{0}-1'.format(role), role, host_map[host_a])

      # update configuration of each
      for group in mgmt.get_all_role_config_groups():
        group.update_config(role_conf[group.roleType])

      mgmt.start().wait()
      # after starting this service needs time to spin up
      sleep(30)
      module.exit_json(changed=True, msg='Mgmt created and started')

    # deploy a given parcel on all hosts in the cluster
    # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4
    elif action_a == 'deploy_parcel':
      name_a = module.params.get('name', None)
      version_a = module.params.get('version', None)

      if "latest" in version_a:
        available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a]
        if "-latest" in version_a:
          version_substr = match('(.+?)-latest', version_a).group(1)
        # if version is just "latest", try to check everything
        else:
          version_substr = ".*"
        try:
          [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None]
        except ValueError:
          module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions))
      else:
        version_parcel = version_a

      # we now go through various stages of getting the parcel
      # as there is no built-in way of waiting for an operation to complete
      # we use loops with sleep to get it done
      parcel = cluster.get_parcel(name_a, version_parcel)
      if parcel.stage == 'AVAILABLE_REMOTELY':
        parcel.start_download()

        while parcel.stage != 'DOWNLOADED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          sleep(10)

      if parcel.stage == 'DOWNLOADED':
        parcel.start_distribution()

        while parcel.stage != 'DISTRIBUTED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          # sleep while hosts report problems after the download
          for i in range(12):
            sleep(10)
            if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
              break

      # since parcels are distributed automatically when a new host is added to a cluster
      # we can encounter the ,,ACTIVATING'' stage then
      if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING':
        if parcel.stage == 'DISTRIBUTED':
          parcel.activate()

        while parcel.stage != 'ACTIVATED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          # this sleep has to be large because although the operation is very fast
          # it makes the management and cloudera hosts go bonkers, failing all of the health checks
          sleep(10)

        # sleep while hosts report problems after the distribution
        for i in range(60):
          sleep(10)
          if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
            break

        module.exit_json(changed=True, msg='Parcel activated')

      if parcel.stage == 'ACTIVATED':
        module.exit_json(changed=False, msg='Parcel already activated')

      # if we get down here, something is not right
      module.fail_json(msg='Invalid parcel state')

    # deploy nodes for workers, according to SERVICE_WORKER_MAP
    # also give them sane names and init zookeeper and kafka ones
    # which need id's specified
    elif action_a == 'deploy_service_worker_nodes':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      role_name = SERVICE_WORKER_MAP[service_a]['name']
      full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring']

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      nodes = [x for x in service.get_all_roles() if role_name in x.name]

      # if host already has the given group, we should skip it
      if host_map[host_a] in [x.hostRef.hostId for x in nodes]:
        module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name))
      # find out the highest id that currently exists
      else:
        node_names = [x.name for x in nodes]
        if len(node_names) == 0:
          # if no nodes, start numbering from 1
          node_i = 1
        else:
          # take the max number and add 1 to it
          node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1

        if service_name == 'ZOOKEEPER':
          role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a)
          # zookeeper needs a per-node ID in the configuration, so we set it now
          role.update_config({'serverId': node_i})
        elif service_name == 'KAFKA':
          role = service.create_role(full_role_name.format(node_i), role_name, host_a)
          # kafka needs a per-node ID in the configuration, so we set it now
          role.update_config({'broker.id': node_i})
        else:
          service.create_role(full_role_name.format(node_i), role_name, host_a)

        module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name))

    # deploy a service. just create it, don't do anything more
    # this is needed maily when we have to set service properties before role deployment
    elif action_a == 'deploy_service':
      name_a = module.params.get('name', None)

      if not name_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(name_a))
      service_name = SERVICE_MAP[name_a]
      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
        module.exit_json(changed=True, msg='{0} service created'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} service already exists'.format(service_name))

    # deploy the base hdfs roles (the namenode and secondary)
    # this doesn't create the service, as at least one datanode should already be added!
    # the format also requires certain properties to be set before we run it
    elif action_a == 'deploy_hdfs_base':
      nn_host_a = module.params.get('nn_host', None)
      sn_host_a = module.params.get('sn_host', None)

      changed = False

      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]

      # don't create a secondary namenode when:
      #- there is one that already exists
      #- there is a second namenode, which means we have HA and don't need a secondary
      if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles:
        hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a)
        changed = True

      # create a namenode and format it's FS
      # formating the namenode requires at least one datanode and secondary namenode already in the cluster!
      if not 'HDFS-NAMENODE' in hdfs_roles:
        hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a)
        for command in hdfs.format_hdfs('HDFS-NAMENODE'):
          if command.wait().success == False:
            module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage))
        changed = True

      module.exit_json(changed=changed, msg='Created HDFS service & NN roles')

    # enable HttpFS for HDFS
    # HUE require this for support HA in HDFS
    elif action_a == 'deploy_hdfs_httpfs':
      host_a = module.params.get('host', None)
      
      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]
      
      # don't install second instance of HttpFS
      if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0:
        module.exit_json(changed=False, msg='HDFS HttpFS service already exists')
       
      hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) 
        
      module.exit_json(changed=True, msg='HDFS HttpFS service created')
      
    # enable HA for HDFS
    # this deletes the secondary namenode and creates a second namenode in it's place
    # also, this spawns 3 journal node and 2 failover controller roles
    elif action_a == 'deploy_hdfs_ha':
      sn_host_a = module.params.get('sn_host', None)
      jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)]

      hdfs = cluster.get_service('HDFS')

      # if there's a second namenode, this means we already have HA enabled
      if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]:
        # this is bad and I should feel bad
        # jns is a list of dictionaries, each dict passes the required journalnode parameters
        jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': '/data0/hadoop/journal', 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)]

        # this call is so long because we set some predictable names for the sevices
        command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER',
                                    active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2')

        children = command.wait().children
        for command_children in children:
          # The format command is expected to fail, since we already formated the namenode
          if command_children.name != 'Format' and command.success == False:
            module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for HDFS service')
      else:
        module.exit_json(changed=False, msg='HDFS HA already enabled')
    # enable HA for YARN
    elif action_a == 'deploy_rm_ha':
      sn_host_a = module.params.get('sn_host', None)

      yarn = cluster.get_service('YARN')

      # if there are two roles matching to this name, this means HA for YARN is enabled
      if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1:
        command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER')
        children = command.wait().children
        for command_children in children:
          if command.success == False:
            module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for YARN service')
      else:
        module.exit_json(changed=False, msg='YARN HA already enabled')

    # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP
    # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP
    elif action_a == 'deploy_base_roles':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      changed = False

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      service_roles = [x.name for x in service.get_all_roles()]

      # create each service from the map
      for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items():
        # check if role already exists, script cant compare it directly
        # after enabling HA on YARN roles will have random strings in names
        if len([0 for x in service_roles if match(role_name, x) != None]) == 0:
          service.create_role(role_name, cloudera_name, host_a)
          changed = True

          # init commmands
          if role_name in SERVICE_INIT_COMMANDS.keys():
            for command_to_run in SERVICE_INIT_COMMANDS[role_name]:
              # different handling of commands specified by name and
              # ones specified by an instance method
              if ismethod(command_to_run):
                command = command_to_run(service)
              else:
                command = service.service_command_by_name(command_to_run)

              if command.wait().success == False:
                module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage))

      if changed == True:
        module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name))

    # set config values for a given service/role
    elif action_a == 'set_config':
      entity_a = module.params.get('entity', None)
      service_a = module.params.get('service', None)
      role_a = module.params.get('role', None)
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)

      if not service_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(service_a))

      # since management is handled differently, it needs a different service
      if service_a == 'management':
        service = manager.get_service()
      elif service_a == 'cm':
        service = manager
      else:
        service = cluster.get_service(SERVICE_MAP[service_a])

      # role and service configs are handled differently
      if entity_a == 'service':
        prev_config = service.get_config()
        curr_config = service.update_config({name_a: value_a})
        if service_a == 'cm':
          prev_config = [prev_config]
          curr_config = [curr_config]
        module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a]))

      elif entity_a == 'role':
        if not role_a in ROLE_MAP:
          module.fail_json(msg='Unknown role: {0}'.format(service))

        role = service.get_role_config_group(ROLE_MAP[role_a])
        prev_config = role.get_config()
        curr_config = role.update_config({name_a: value_a})
        module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a]))

      else:
        module.fail_json(msg='Invalid entity, must be one of service, role')

    # handle service state
    # currently this only can start/restart a service
    elif action_a == 'service':
      state_a = module.params.get('state', None)
      service_a = module.params.get('service', None)

      try:
        if service_a == 'cm':
          service = manager.get_service()
        else:
          service = cluster.get_service(SERVICE_MAP[service_a])
      except ApiException:
        module.fail_json(msg='Service does not exist')

      # when starting a service, we also deploy the client config for it
      if state_a == 'started':
        if service.serviceState == 'STARTED':
          module.exit_json(changed=False, msg='Service already running')
        method = service.start
        verb = "start"
      elif state_a == 'restarted':
        method = service.restart
        verb = "restart"

      try:
        command = service.deploy_client_config()
        if command.wait().success == False:
          module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      # since there is no way to check if a service handles client config deployments
      # we try our best and pass the exception if it doesn't
      except ApiException, AttributeError:
        pass

      method().wait()
      # we need to wait for cloudera checks to complete...
      # otherwise it will report as failing
      sleep(10)
      for i in range(24):
        sleep(10)
        service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
        if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
          break
      service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
      if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
        module.exit_json(changed=True, msg='Service {0} successful'.format(verb))
      else:
        module.fail_json(msg='Service {0} failed'.format(verb))

    # handle cluster
    # currently this only can restart
    elif action_a == 'cluster':
      state_a = module.params.get('state', None)

      if state_a == 'restarted':
        command = cluster.restart(redeploy_client_configuration=True)
        if command.wait().success == False:
          module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage))
        else:
          module.exit_json(changed=True, msg='Cluster restart successful')

    # Snapshot policy
    # only create is supported
    elif action_a == 'create_snapshot_policy':
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)
      service_a = module.params.get('service', None)
      service = cluster.get_service(SERVICE_MAP[service_a])
      payload=loads(value_a)
      # checking if policy already exists. Exception is expected when configure for the first time.
      try: 
        test = service.get_snapshot_policy(name_a)
        module.exit_json(changed=False, msg='Defined policy already exists')
      except ApiException:
        pass
      try:
        command = service.create_snapshot_policy(payload)
        module.exit_json(changed=True, msg='Snapshot policy was created.')
      except ApiException, AttributeError:
        module.fail_json(msg='ERROR in creating snapshot policy.')
Ejemplo n.º 38
0
class ClouderaManager(object):
    """
    The complete orchestration of a cluster from start to finish assuming all the hosts are
    configured and Cloudera Manager is installed with all the required databases setup.

    Handle all the steps required in creating a cluster. All the functions are built to function
    idempotently. So you should be able to resume from any failed step but running thru the
    __class__.setup()
    """
    def __init__(self, module, config, trial=False, license_txt=None):
        self.api = ApiResource(config['cm']['host'],
                               username=config['cm']['username'],
                               password=config['cm']['password'])
        self.manager = self.api.get_cloudera_manager()
        self.config = config
        self.module = module
        self.trial = trial
        self.license_txt = license_txt
        self.cluster = None

    def enable_license(self):
        """
        Enable the requested license, either it's trial mode or a full license is entered and
        registered.
        """
        try:
            _license = self.manager.get_license()
        except ApiException:
            print_json(type="LICENSE", msg="Enabling license")
            if self.trial:
                self.manager.begin_trial()
            else:
                if license_txt is not None:
                    self.manager.update_license(license_txt)
                else:
                    fail(
                        self.module,
                        'License should be provided or trial should be specified'
                    )

            try:
                _license = self.manager.get_license()
            except ApiException:
                fail(self.module, 'Failed enabling license')
        print_json(type="LICENSE",
                   msg="Owner: {}, UUID: {}".format(_license.owner,
                                                    _license.uuid))

    def create_cluster(self):
        """
        Create a cluster and add hosts to the cluster. A new cluster is only created
        if another one doesn't exist with the same name.
        """
        print_json(type="CLUSTER", msg="Creating cluster")
        cluster_config = self.config['cluster']
        try:
            self.cluster = self.api.get_cluster(cluster_config['name'])
        except ApiException:
            print_json(type="CLUSTER",
                       msg="Creating Cluster entity: {}".format(
                           cluster_config['name']))
            self.cluster = self.api.create_cluster(
                cluster_config['name'], cluster_config['version'],
                cluster_config['fullVersion'])

        cluster_hosts = [
            self.api.get_host(host.hostId).hostname
            for host in self.cluster.list_hosts()
        ]
        hosts = []
        for host in cluster_config['hosts']:
            if host not in cluster_hosts:
                hosts.append(host)
        self.cluster.add_hosts(hosts)

    def activate_parcels(self):
        print_json(type="PARCELS", msg="Setting up parcels")
        for parcel_cfg in self.config['parcels']:
            parcel = Parcels(self.module, self.manager, self.cluster,
                             parcel_cfg.get('version'), parcel_cfg.get('repo'),
                             parcel_cfg.get('product', 'CDH'))
            parcel.download()
            parcel.distribute()
            parcel.activate()

    @retry(attempts=20, delay=5)
    def wait_inspect_hosts(self, cmd):
        """
        Inspect all the hosts. Basically wait till the check completes on all hosts.

        :param cmd: A command instance used for tracking the status of the command
        """
        print_json(type="HOSTS", msg="Inspecting hosts")
        cmd = cmd.fetch()
        if cmd.success is None:
            raise ApiException("Waiting on command {} to finish".format(cmd))
        elif not cmd.success:
            if (cmd.resultMessage is not None
                    and 'is not currently available for execution'
                    in cmd.resultMessage):
                raise ApiException('Retry Command')
            fail(self.module, 'Host inspection failed')
        print_json(type="HOSTS",
                   msg="Host inspection completed: {}".format(
                       cmd.resultMessage))

    def deploy_mgmt_services(self):
        """
        Configure, deploy and start all the Cloudera Management Services.
        """
        print_json(type="MGMT", msg="Deploying Management Services")
        try:
            mgmt = self.manager.get_service()
            if mgmt.serviceState == 'STARTED':
                return
        except ApiException:
            print_json(type="MGMT",
                       msg="Management Services don't exist. Creating.")
            mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo())

        for role in config['services']['MGMT']['roles']:
            if not len(mgmt.get_roles_by_type(role['group'])) > 0:
                print_json(type="MGMT",
                           msg="Creating role for {}".format(role['group']))
                mgmt.create_role('{}-1'.format(role['group']), role['group'],
                                 role['hosts'][0])

        for role in config['services']['MGMT']['roles']:
            role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format(
                role['group']))
            role_group.update_config(role.get('config', {}))

        mgmt.start().wait()
        if self.manager.get_service().serviceState == 'STARTED':
            print_json(type="MGMT", msg="Management Services started")
        else:
            fail(
                self.module,
                "[MGMT] Cloudera Management services didn't start up properly")

    def service_orchestrate(self, services):
        """
        Create, pre-configure provided list of services
        Stop/Start those services
        Perform and post service startup actions

        :param services: List of Services to perform service specific actions
        """
        service_classes = []

        # Create and pre-configure provided services
        for service in services:
            service_config = self.config['services'].get(service.upper())
            if service_config:
                svc = getattr(sys.modules[__name__], service)(self.cluster,
                                                              service_config)
                if not svc.started:
                    svc.deploy()
                    svc.pre_start()
                service_classes.append(svc)

        print_json(type="CLUSTER",
                   msg="Starting services: {} on Cluster".format(services))

        # Deploy all the client configs, since some of the services depend on other services
        # and is essential that the client configs are in place
        self.cluster.deploy_client_config()

        # Start each service and run the post_start actions for each service
        for svc in service_classes:
            # Only go thru the steps if the service is not yet started. This helps with
            # re-running the script after fixing errors
            if not svc.started:
                svc.start()
                svc.post_start()

    def setup(self):
        # TODO(rnirmal): Cloudera Manager SSL?

        # Enable a full license or start a trial
        self.enable_license()

        # Create the cluster entity and associate hosts
        self.create_cluster()

        # Download and activate the parcels
        self.activate_parcels()

        # Inspect all the hosts
        self.wait_inspect_hosts(self.manager.inspect_hosts())

        # Create Management services
        self.deploy_mgmt_services()

        # Configure and Start base services
        self.service_orchestrate(BASE_SERVICES)

        # Configure and Start remaining services
        self.service_orchestrate(ADDITIONAL_SERVICES)
Ejemplo n.º 39
0
class Deploy:
    def __init__(self,
                 cm_port='7180',
                 cm_user='******',
                 cm_passwd='admin',
                 cluster_name='cluster1'):

        self.cluster_name = cluster_name
        self.cdh_version = "CDH5"

        self.cfg = ParseConfig()
        self.host_list = self.cfg.get_hosts()

        self._get_host_allocate()
        self.cm_host = self.host_list[0]

        self.api = ApiResource(self.cm_host,
                               cm_port,
                               cm_user,
                               cm_passwd,
                               version=7)
        self.cm = self.api.get_cloudera_manager()

        try:
            self.cluster = self.api.get_cluster(self.cluster_name)
        except:
            try:
                self.cluster = self.api.create_cluster(self.cluster_name,
                                                       self.cdh_version)
            except:
                err('Cannot connect to cloudera manager on %s' % self.cm_host)

        # add all our hosts to the cluster
        try:
            self.cluster.add_hosts(self.host_list)
            info('Add hosts successfully')
        except Exception as e:
            if e.code == 400:
                info('Already Added hosts')
            elif e.code == 404:
                err(e.message)

    def _auto_allocate(self, hosts):
        # enable mgmt node if node count is larger than mgmt_th
        mgmt_th = 6

        if type(hosts) != list: err('hosts parameter should be a list')
        host_num = len(hosts)
        # node<=3, ZK=1 ,node>3, ZK=3
        zk_num = 1 if host_num <= 3 else 3

        # with mgmt node
        if host_num >= mgmt_th:
            self.ap_host = self.es_host = self.ho_host = self.sm_host = self.nn_host = self.hm_host = self.jt_host = hosts[
                0]
            self.dn_hosts = self.rs_hosts = self.tt_hosts = hosts[1:]
            self.snn_host = hosts[1]
            self.hms_host = hosts[2]
            self.hs2_host = hosts[3]
        # without mgmt node
        else:
            if host_num == 1:
                self.ap_host = self.es_host = self.ho_host = self.sm_host = self.jt_host = \
                self.nn_host = self.hm_host = self.snn_host = self.hms_host = self.hs2_host = hosts[0]
            elif host_num > 1:
                # nn, snn not on same node
                tmp_hosts = hosts[:]
                self.nn_host = choice(tmp_hosts)
                tmp_hosts.remove(self.nn_host)
                self.snn_host = choice(tmp_hosts)
                self.hm_host = choice(tmp_hosts)
                self.jt_host = choice(hosts)
                self.hms_host = choice(hosts)
                self.hs2_host = choice(hosts)
                # cm
                self.ap_host = choice(hosts)
                self.es_host = choice(hosts)
                self.ho_host = choice(hosts)
                self.sm_host = choice(hosts)

            self.dn_hosts = self.rs_hosts = self.tt_hosts = hosts

        self.zk_hosts = hosts[-zk_num:]

    def _get_host_allocate(self):
        roles = self.cfg.get_roles()
        # auto set if no role config found
        if not roles:
            self._auto_allocate(self.host_list)
            return

        valid_roles = [
            'DN', 'RS', 'ZK', 'HM', 'NN', 'SNN', 'AP', 'ES', 'SM', 'HO', 'TT',
            'JT', 'HMS', 'HS2'
        ]
        role_host = defaultdict(list)

        for item in roles:
            for role in item[1]:
                role = role.strip()
                if role not in valid_roles: err('Incorrect role config')
                role_host[role].append(item[0])

        # cdh
        self.nn_host = role_host['NN'][0]
        self.snn_host = role_host['SNN'][0]
        self.hm_host = role_host['HM'][0]
        self.jt_host = role_host['JT'][0]
        self.hms_host = role_host['HMS'][0]
        self.hs2_host = role_host['HS2'][0]
        self.tt_hosts = role_host['TT']
        self.zk_hosts = role_host['ZK']
        self.dn_hosts = role_host['DN']
        self.rs_hosts = role_host['RS']
        # cm
        self.ap_host = role_host['AP'][0]
        self.es_host = role_host['ES'][0]
        self.ho_host = role_host['HO'][0]
        self.sm_host = role_host['SM'][0]

    def setup_cms(self):
        try:
            self.cm.delete_mgmt_service()
        except:
            pass

        # create the management service
        try:
            mgmt = self.cm.create_mgmt_service(ApiServiceSetupInfo())
            mgmt.create_role('AlertPublisher', "ALERTPUBLISHER", self.ap_host)
            mgmt.create_role('EventServer', "EVENTSERVER", self.es_host)
            mgmt.create_role('HostMonitor', "HOSTMONITOR", self.hm_host)
            mgmt.create_role('ServiceMonitor', "SERVICEMONITOR", self.sm_host)
            ok('Cloudera management service created successfully.')
        except ApiException:
            info('Cloudera management service had already been created.')

    def setup_parcel(self):
        parcels_list = []
        i = 1
        for p in self.cluster.get_all_parcels():
            if p.stage == 'AVAILABLE_REMOTELY': continue
            elif p.stage == 'ACTIVATED':
                info('Parcel [%s] has already been activated' % p.version)
                return
            else:
                print '\t' + str(i) + ': ' + p.product + ' ' + p.version
                i += 1
                parcels_list.append(p)

        if len(parcels_list) == 0:
            err('No downloaded ' + self.cdh_version + ' parcel found!')
        elif len(parcels_list) > 1:
            index = raw_input('Input parcel number:')
            if not index.isdigit:
                err('Error index, must be a number')
            cdh_parcel = parcels_list[int(index) - 1]
        else:
            cdh_parcel = parcels_list[0]

    #  # download the parcel
    #  print "Starting parcel download. This might take a while."
    #  cmd = cdh_parcel.start_download()
    #  if cmd.success != True:
    #      print "Parcel download failed!"
    #      exit(0)

    #  # make sure the download finishes
    #  while cdh_parcel.stage != 'DOWNLOADED':
    #  sleep(5)
    #      cdh_parcel = self.cluster.get_parcel(cdh_parcel.product, cdh_parcel.version)

    #  print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded"

    # distribute the parcel
        info('Starting parcel distribution. This might take a while.')
        cmd = cdh_parcel.start_distribution()
        i = 0
        while cmd.success == None:
            i += 1
            sleep(5)
            cmd = cmd.fetch()
            s = '.' * i
            print '\r%s' % s,
            sys.stdout.flush()
        if cmd.success != True:
            err('Parcel distribution failed!')

        # make sure the distribution finishes
        while cdh_parcel.stage != "DISTRIBUTED":
            sleep(5)
            cdh_parcel = self.cluster.get_parcel(cdh_parcel.product,
                                                 cdh_parcel.version)

        ok(cdh_parcel.product + ' ' + cdh_parcel.version + ' distributed')

        # activate the parcel
        cmd = cdh_parcel.activate()
        if cmd.success != True:
            err('Parcel activation failed!')

        # make sure the activation finishes
        while cdh_parcel.stage != "ACTIVATED":
            sleep(5)
            cdh_parcel = self.cluster.get_parcel(cdh_parcel.product,
                                                 cdh_parcel.version)

        ok(cdh_parcel.product + ' ' + cdh_parcel.version + ' activated')

    def _create_service(self, sdata):
        try:
            self.cluster.get_service(sdata['sname'])
            info('Service %s had already been configured' % sdata['sname'])
        except ApiException:
            service = self.cluster.create_service(sdata['sname'],
                                                  sdata['stype'])
            ok('Service %s had been created successfully' % sdata['sname'])
            for role in sdata['roles']:
                if role.has_key('rhost'):
                    service.create_role(role['rname'], role['rtype'],
                                        role['rhost'])
                elif role.has_key('rhosts'):
                    rid = 0
                    for host in role['rhosts']:
                        rid += 1
                        service.create_role(role['rname'] + '-' + str(rid),
                                            role['rtype'], host)

    def setup_cdh(self):
        service_data = [{
            'sname':
            'hdfs',
            'stype':
            'HDFS',
            'roles': [{
                'rname': 'hdfs-namenode',
                'rtype': 'NAMENODE',
                'rhost': self.nn_host
            }, {
                'rname': 'hdfs-secondarynamenode',
                'rtype': 'SECONDARYNAMENODE',
                'rhost': self.snn_host
            }, {
                'rname': 'hdfs-datanode',
                'rtype': 'DATANODE',
                'rhosts': self.dn_hosts
            }]
        }, {
            'sname':
            'zookeeper',
            'stype':
            'ZOOKEEPER',
            'roles': [{
                'rname': 'zookeeper',
                'rtype': 'SERVER',
                'rhosts': self.zk_hosts
            }]
        }, {
            'sname':
            'hbase',
            'stype':
            'HBASE',
            'roles': [{
                'rname': 'hbase-master',
                'rtype': 'MASTER',
                'rhost': self.hm_host
            }, {
                'rname': 'hdfs-regionserver',
                'rtype': 'REGIONSERVER',
                'rhosts': self.rs_hosts
            }]
        }, {
            'sname':
            'hive',
            'stype':
            'HIVE',
            'roles': [{
                'rname': 'hive-metastore',
                'rtype': 'HIVEMETASTORE',
                'rhost': self.hms_host
            }, {
                'rname': 'hive-server2',
                'rtype': 'HIVESERVER2',
                'rhost': self.hs2_host
            }, {
                'rname': 'hive-gateway',
                'rtype': 'GATEWAY',
                'rhosts': self.dn_hosts
            }]
        }, {
            'sname':
            'mapreduce',
            'stype':
            'MAPREDUCE',
            'roles': [{
                'rname': 'mapreduce-jobtracker',
                'rtype': 'JOBTRACKER',
                'rhost': self.jt_host
            }, {
                'rname': 'mapreduce-tasktracker',
                'rtype': 'TASKTRACKER',
                'rhosts': self.tt_hosts
            }]
        }]

        for sdata in service_data:
            self._create_service(sdata)

        # additional config for hive
        try:
            hive_service = self.cluster.get_service('hive')
            hive_metastore_host = self.cm_host  # should be same as cm's host, FQDN
            hive_metastore_name = 'hive'
            hive_metastore_password = '******'
            hive_metastore_database_port = '7432'
            hive_metastore_database_type = 'postgresql'
            hive_config = { 'hive_metastore_database_host' : hive_metastore_host, \
                            'hive_metastore_database_name' : hive_metastore_name, \
                            'hive_metastore_database_password' : hive_metastore_password, \
                            'hive_metastore_database_port' : hive_metastore_database_port, \
                            'hive_metastore_database_type' : hive_metastore_database_type }
            hive_service.update_config(hive_config)
            ok('Additional hive configs had been updated')
        except ApiException as e:
            err(e.message)

        # use auto configure for *-site.xml configs
        try:
            self.cluster.auto_configure()
        except ApiException as e:
            err(e.message)

    def start_cms(self):
        # start the management service
        info('Starting cloudera management service...')
        cms = self.cm.get_service()
        cms.start().wait()
        ok('Cloudera management service started successfully')

    def start_cdh(self):
        info('Excuting first run command. This might take a while.')
        cmd = self.cluster.first_run()

        while cmd.success == None:
            cmd = cmd.fetch()
            sleep(1)

        if cmd.success != True:
            err('The first run command failed: ' + cmd.resultMessage)

        ok('First run successfully executed. Your cluster has been set up!')
Ejemplo n.º 40
0
def main():
   API = ApiResource(CM_HOST, version=5, username=ADMIN_USER, password=ADMIN_PASS)
   MANAGER = API.get_cloudera_manager()
   MANAGER.update_config(CM_CONFIG)
   print "Connected to CM host on " + CM_HOST + " and updated CM configuration"

   CLUSTER = init_cluster(API, CLUSTER_NAME, CDH_VERSION, CLUSTER_HOSTS, CM_HOST)
   print "Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CDH_VERSION

   deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG, MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG, APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME, ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG, SMON_ROLENAME, SMON_ROLE_CONFIG, NAV_ROLENAME, NAV_ROLE_CONFIG, NAVMS_ROLENAME, NAVMS_ROLE_CONFIG, RMAN_ROLENAME, RMAN_ROLE_CONFIG)
   print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + CM_HOST
   
   deploy_parcels(CLUSTER, PARCELS)
   print "Downloaded and distributed parcels: "
   PRETTY_PRINT.pprint(PARCELS)

   zookeeper_service = deploy_zookeeper(CLUSTER, ZOOKEEPER_SERVICE_NAME, ZOOKEEPER_HOSTS, ZOOKEEPER_SERVICE_CONFIG, ZOOKEEPER_ROLE_CONFIG)
   print "Deployed ZooKeeper " + ZOOKEEPER_SERVICE_NAME + " to run on: "
   PRETTY_PRINT.pprint(ZOOKEEPER_HOSTS)
   
   hdfs_service = deploy_hdfs(CLUSTER, HDFS_SERVICE_NAME, HDFS_SERVICE_CONFIG, HDFS_NAMENODE_SERVICE_NAME, HDFS_NAMENODE_HOST, HDFS_NAMENODE_CONFIG, HDFS_SECONDARY_NAMENODE_HOST, HDFS_SECONDARY_NAMENODE_CONFIG, HDFS_DATANODE_HOSTS, HDFS_DATANODE_CONFIG, HDFS_GATEWAY_HOSTS, HDFS_GATEWAY_CONFIG)
   print "Deployed HDFS service " + HDFS_SERVICE_NAME + " using NameNode on " + HDFS_NAMENODE_HOST + ", SecondaryNameNode on " + HDFS_SECONDARY_NAMENODE_HOST + ", and DataNodes running on: "
   PRETTY_PRINT.pprint(HDFS_DATANODE_HOSTS)
   init_hdfs(hdfs_service, HDFS_SERVICE_NAME, CMD_TIMEOUT)
   print "Initialized HDFS service"

   # mapred and yarn are mutually exclusive; only deploy one of them
   #mapred_service = deploy_mapreduce(CLUSTER, MAPRED_SERVICE_NAME, MAPRED_SERVICE_CONFIG, MAPRED_JT_HOST, MAPRED_JT_CONFIG, MAPRED_TT_HOSTS, MAPRED_TT_CONFIG, MAPRED_GW_HOSTS, MAPRED_GW_CONFIG)
   print "Deployed MapReduce service " + MAPRED_SERVICE_NAME + " using JobTracker on " + MAPRED_JT_HOST + " and TaskTrackers running on "
   PRETTY_PRINT.pprint(MAPRED_TT_HOSTS)
   
   yarn_service = deploy_yarn(CLUSTER, YARN_SERVICE_NAME, YARN_SERVICE_CONFIG, YARN_RM_HOST, YARN_RM_CONFIG, YARN_JHS_HOST, YARN_JHS_CONFIG, YARN_NM_HOSTS, YARN_NM_CONFIG, YARN_GW_HOSTS, YARN_GW_CONFIG)
   print "Deployed YARN service " + YARN_SERVICE_NAME + " using ResourceManager on " + YARN_RM_HOST + ", JobHistoryServer on " + YARN_JHS_HOST + ", and NodeManagers on "
   PRETTY_PRINT.pprint(YARN_NM_HOSTS)
   
   spark_service = deploy_spark(CLUSTER, SPARK_SERVICE_NAME, SPARK_SERVICE_CONFIG, SPARK_MASTER_HOST, SPARK_MASTER_CONFIG, SPARK_WORKER_HOSTS, SPARK_WORKER_CONFIG, SPARK_GW_HOSTS, SPARK_GW_CONFIG)
   print "Deployed SPARK service " + SPARK_SERVICE_NAME + " using SparkMaster on " + SPARK_MASTER_HOST + " and SparkWorkers on "
   PRETTY_PRINT.pprint(SPARK_WORKER_HOSTS)
   
   deploy_hbase(CLUSTER, HBASE_SERVICE_NAME, HBASE_SERVICE_CONFIG, HBASE_HM_HOST, HBASE_HM_CONFIG, HBASE_RS_HOSTS, HBASE_RS_CONFIG, HBASE_THRIFTSERVER_SERVICE_NAME, HBASE_THRIFTSERVER_HOST, HBASE_THRIFTSERVER_CONFIG, HBASE_GW_HOSTS, HBASE_GW_CONFIG)
   print "Deployed HBase service " + HBASE_SERVICE_NAME + " using HMaster on " + HBASE_HM_HOST + " and RegionServers on "
   PRETTY_PRINT.pprint(HBASE_RS_HOSTS)
   
   hive_service = deploy_hive(CLUSTER, HIVE_SERVICE_NAME, HIVE_SERVICE_CONFIG, HIVE_HMS_HOST, HIVE_HMS_CONFIG, HIVE_HS2_HOST, HIVE_HS2_CONFIG, HIVE_WHC_HOST, HIVE_WHC_CONFIG, HIVE_GW_HOSTS, HIVE_GW_CONFIG)
   print "Depoyed Hive service " + HIVE_SERVICE_NAME + " using HiveMetastoreServer on " + HIVE_HMS_HOST + " and HiveServer2 on " + HIVE_HS2_HOST
   init_hive(hive_service)
   print "Initialized Hive service"
   
   impala_service = deploy_impala(CLUSTER, IMPALA_SERVICE_NAME, IMPALA_SERVICE_CONFIG, IMPALA_SS_HOST, IMPALA_SS_CONFIG, IMPALA_CS_HOST, IMPALA_CS_CONFIG, IMPALA_ID_HOSTS, IMPALA_ID_CONFIG)
   print "Deployed Impala service " + IMPALA_SERVICE_NAME + " using StateStore on " + IMPALA_SS_HOST + ", CatalogServer on " + IMPALA_CS_HOST + ", and ImpalaDaemons on "
   PRETTY_PRINT.pprint(IMPALA_ID_HOSTS)
   
   #Need to start the cluster now as subsequent services need the cluster to be runnign
   #TODO can we just start ZK, and maybe HDFS, instead of everything? It's just needed for the search service
   print "About to restart cluster"
   CLUSTER.stop().wait()
   CLUSTER.start().wait()
   print "Done restarting cluster"

   search_service = deploy_search(CLUSTER, SEARCH_SERVICE_NAME, SEARCH_SERVICE_CONFIG, SEARCH_SOLR_HOST, SEARCH_SOLR_CONFIG, SEARCH_GW_HOSTS, SEARCH_GW_CONFIG)
   print "Deployed Search service " + SEARCH_SERVICE_NAME + " using SOLRHost " + SEARCH_SOLR_HOST
   
   flume_service = deploy_flume(CLUSTER, FLUME_SERVICE_NAME, FLUME_SERVICE_CONFIG, FLUME_AGENT_HOSTS, FLUME_AGENT_CONFIG)
   print "Deployed Flume service " + FLUME_SERVICE_NAME + " using FlumeAgents on "
   PRETTY_PRINT.pprint(FLUME_AGENT_HOSTS)
   
   oozie_service = deploy_oozie(CLUSTER, OOZIE_SERVICE_NAME, OOZIE_SERVICE_CONFIG, OOZIE_SERVER_HOST, OOZIE_SERVER_CONFIG)
   print "Deployed Oozie service " + OOZIE_SERVICE_NAME + " using OozieServer on " + OOZIE_SERVER_HOST
   
   sqoop_service = deploy_sqoop(CLUSTER, SQOOP_SERVICE_NAME, SQOOP_SERVICE_CONFIG, SQOOP_SERVER_HOST, SQOOP_SERVER_CONFIG)
   print "Deployed Sqoop service " + SQOOP_SERVICE_NAME + " using SqoopServer on " + SQOOP_SERVER_HOST
   
   hue_service = deploy_hue(CLUSTER, HUE_SERVICE_NAME, HUE_SERVICE_CONFIG, HUE_SERVER_HOST, HUE_SERVER_CONFIG, HUE_KTR_HOST, HUE_KTR_CONFIG)
   print "Deployed HUE service " + HUE_SERVICE_NAME + " using HueServer on " + HUE_SERVER_HOST
   
   #deploy_accumulo(CLUSTER, ACCUMULO_SERVICE_NAME, ACCUMULO_SERVICE_CONFIG, ACCUMULO_MASTER_HOSTS, ACCUMULO_MASTER_CONFIG, ACCUMULO_TRACER_HOSTS, ACCUMULO_TRACER_CONFIG, ACCUMULO_TSERVER_HOSTS, ACCUMULO_TSERVER_CONFIG, ACCUMULO_LOGGER_HOSTS, ACCUMULO_LOGGER_CONFIG, ACCUMULO_MONITOR_HOST, ACCUMULO_MONITOR_CONFIG, ACCUMULO_GC_HOST, ACCUMULO_GC_CONFIG, ACCUMULO_GATEWAY_HOSTS, ACCUMULO_GATEWAY_CONFIG)
   
   print "About to restart cluster."
   CLUSTER.stop().wait()
   CLUSTER.start().wait()
   print "Done restarting cluster."
   
   post_startup(CLUSTER, hdfs_service, oozie_service)

   print "Finished deploying Cloudera cluster. Go to http://" + CM_HOST + ":7180 to administer the cluster."
   print "If the Oozie service (and therefore the HUE service as well, which depends on it) did not start properly, go to the Oozie service, stop it, click on the Actions button and choose 'Create Database', then start it."
   print "If there are any other services not running, restart them now."
Ejemplo n.º 41
0
class ClouderaManagerDeployment(object):
    def __init__(self, cm_server_address, cm_server_port=DEFAULT_CM_PORT,
                 username=DEFAULT_CM_USERNAME, password=DEFAULT_CM_PASSWORD):
        self.cm_server_address = cm_server_address
        self.cm_server_port = cm_server_port
        self.username = username
        self.password = password

    def setup_api_resources(self):
        self.api = ApiResource(server_host=self.cm_server_address, server_port=self.cm_server_port,
                               username=self.username, password=self.password,
                               version=self._get_api_version())

        self.cm = self.api.get_cloudera_manager()
        self.cluster = self.api.get_cluster('Cluster 1 (clusterdock)')

    def prep_for_start(self):
        pass

    def validate_services_started(self, timeout_min=10, healthy_time_threshold_sec=30):
        start_validating_time = time()
        healthy_time = None

        logger.info('Beginning service health validation...')
        while healthy_time is None or (time() - healthy_time < healthy_time_threshold_sec):
            if (time() - start_validating_time < timeout_min * 60):
                all_services = list(self.cluster.get_all_services()) + [self.cm.get_service()]
                at_fault_services = list()
                for service in all_services:
                    if (service.serviceState != "NA" and service.serviceState != "STARTED"):
                        at_fault_services.append([service.name, "NOT STARTED"])
                    elif (service.serviceState != "NA" and service.healthSummary != "GOOD"):
                        checks = list()
                        for check in service.healthChecks:
                            if (check["summary"] not in ("GOOD", "DISABLED")):
                                checks.append(check["name"])
                        at_fault_services.append([service.name,
                                                 "Failed health checks: {0}".format(checks)])

                if not healthy_time or at_fault_services:
                    healthy_time = time() if not at_fault_services else None
                sleep(3)
            else:
                raise Exception(("Timed out after waiting {0} minutes for services to start "
                                "(at fault: {1}).").format(timeout_min, at_fault_services))
        logger.info("Validated that all services started (time: %.2f s).",
                    time() - start_validating_time)

    def add_hosts_to_cluster(self, secondary_node_fqdn, all_fqdns):
        cm_utils.add_hosts_to_cluster(api=self.api, cluster=self.cluster,
                                      secondary_node_fqdn=secondary_node_fqdn,
                                      all_fqdns=all_fqdns)

    def update_hive_metastore_namenodes(self):
        for service in self.cluster.get_all_services():
            if service.type == 'HIVE':
                logger.info('Updating NameNode references in Hive metastore...')
                update_metastore_namenodes_cmd = service.update_metastore_namenodes().wait()
                if not update_metastore_namenodes_cmd.success:
                    logger.warning(("Failed to update NameNode references in Hive metastore "
                                    "(command returned %s)."), update_metastore_namenodes_cmd)

    def update_database_configs(self):
        cm_utils.update_database_configs(api=self.api, cluster=self.cluster)

    def _get_api_version(self):
        api_version_response = requests.get(
            "http://{0}:{1}/api/version".format(self.cm_server_address,
                                                self.cm_server_port),
            auth=(self.username, self.password))
        api_version_response.raise_for_status()
        api_version = api_version_response.content
        if 'v' not in api_version:
            raise Exception("/api/version returned unexpected result (%s).", api_version)
        else:
            logger.info("Detected CM API %s.", api_version)
            return api_version.strip('v')
def main():
    api = ApiResource(cm_host,
                      cm_port,
                      cm_username,
                      cm_password,
                      version=api_num)
    cm = ClouderaManager(api)
    #cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url)
    MANAGER = api.get_cloudera_manager()
    #MANAGER.update_config)
    print "Connected to CM host on " + cm_host + " and updated CM configuration"

    CLUSTER = init_cluster(api, cluster_name, cdh_version, host_list,
                           host_list)

    deploy_management(MANAGER, MGMT_SERVICENAME, MGMT_SERVICE_CONFIG,
                      MGMT_ROLE_CONFIG, AMON_ROLENAME, AMON_ROLE_CONFIG,
                      APUB_ROLENAME, APUB_ROLE_CONFIG, ESERV_ROLENAME,
                      ESERV_ROLE_CONFIG, HMON_ROLENAME, HMON_ROLE_CONFIG,
                      SMON_ROLENAME, SMON_ROLE_CONFIG, NAV_ROLENAME,
                      NAV_ROLE_CONFIG, NAVMS_ROLENAME, NAVMS_ROLE_CONFIG,
                      RMAN_ROLENAME, RMAN_ROLE_CONFIG)

    print "Deployed CM management service " + MGMT_SERVICENAME + " to run on " + cm_host + "now service is stop!"

    deploy_parcels(CLUSTER, PARCELS)
    print "Downloaded and distributed parcels: "
    PRETTY_PRINT.pprint(PARCELS)

    zookeeper_service = deploy_zookeeper(CLUSTER, ZOOKEEPER_SERVICE_NAME,
                                         ZOOKEEPER_HOSTS,
                                         ZOOKEEPER_SERVICE_CONFIG,
                                         ZOOKEEPER_ROLE_CONFIG)
    print "Deployed ZooKeeper " + ZOOKEEPER_SERVICE_NAME + " to run on: "
    PRETTY_PRINT.pprint(ZOOKEEPER_HOSTS)

    hdfs_service = deploy_hdfs(CLUSTER, HDFS_SERVICE_NAME, HDFS_SERVICE_CONFIG,
                               HDFS_NAMENODE_SERVICE_NAME, HDFS_NAMENODE_HOST,
                               HDFS_NAMENODE_CONFIG,
                               HDFS_SECONDARY_NAMENODE_HOST,
                               HDFS_SECONDARY_NAMENODE_CONFIG,
                               HDFS_DATANODE_HOSTS, HDFS_DATANODE_CONFIG,
                               HDFS_GATEWAY_HOSTS, HDFS_GATEWAY_CONFIG)
    print "Deployed HDFS service " + HDFS_SERVICE_NAME + " using NameNode on " + HDFS_NAMENODE_HOST + ", SecondaryNameNode on " + HDFS_SECONDARY_NAMENODE_HOST + ", and DataNodes running on: "
    PRETTY_PRINT.pprint(HDFS_DATANODE_HOSTS)
    init_hdfs(hdfs_service, HDFS_SERVICE_NAME, 600)
    # Test move last method to here orginal is from post_startup function
    #hdfs_service.create_hdfs_tmp()
    print "Initialized HDFS service"

    yarn_service = deploy_yarn(CLUSTER, YARN_SERVICE_NAME, YARN_SERVICE_CONFIG,
                               YARN_RM_HOST, YARN_RM_CONFIG, YARN_JHS_HOST,
                               YARN_JHS_CONFIG, YARN_NM_HOSTS, YARN_NM_CONFIG,
                               YARN_GW_HOSTS, YARN_GW_CONFIG)
    print "Deployed YARN service " + YARN_SERVICE_NAME + " using ResourceManager on " + YARN_RM_HOST + ", JobHistoryServer on " + YARN_JHS_HOST + ", and NodeManagers on "
    PRETTY_PRINT.pprint(YARN_NM_HOSTS)

    #deploy_hbase(CLUSTER, HBASE_SERVICE_NAME, HBASE_SERVICE_CONFIG, HBASE_HM_HOST, HBASE_HM_CONFIG, HBASE_RS_HOSTS, HBASE_RS_CONFIG, HBASE_THRIFTSERVER_SERVICE_NAME, HBASE_THRIFTSERVER_HOST, HBASE_THRIFTSERVER_CONFIG, HBASE_GW_HOSTS, HBASE_GW_CONFIG)
    deploy_hbase(CLUSTER, HBASE_SERVICE_NAME, HBASE_SERVICE_CONFIG,
                 HBASE_HM_HOST, HBASE_HM_CONFIG, HBASE_RS_HOSTS,
                 HBASE_RS_CONFIG, HBASE_GW_HOSTS, HBASE_GW_CONFIG)
    print "Deployed HBase service " + HBASE_SERVICE_NAME + " using HMaster on " + HBASE_HM_HOST + " and RegionServers on "
    PRETTY_PRINT.pprint(HBASE_RS_HOSTS)

    hive_service = deploy_hive(CLUSTER, HIVE_SERVICE_NAME, HIVE_SERVICE_CONFIG,
                               HIVE_HMS_HOST, HIVE_HMS_CONFIG, HIVE_HS2_HOST,
                               HIVE_HS2_CONFIG, HIVE_GW_HOSTS, HIVE_GW_CONFIG)
    print "Depoyed Hive service " + HIVE_SERVICE_NAME + " using HiveMetastoreServer on " + HIVE_HMS_HOST + " and HiveServer2 on " + HIVE_HS2_HOST
    init_hive(hive_service)
    print "Initialized Hive service"

    impala_service = deploy_impala(CLUSTER, IMPALA_SERVICE_NAME,
                                   IMPALA_SERVICE_CONFIG, IMPALA_SS_HOST,
                                   IMPALA_SS_CONFIG, IMPALA_CS_HOST,
                                   IMPALA_CS_CONFIG, IMPALA_ID_HOSTS,
                                   IMPALA_ID_CONFIG)
    print "Deployed Impala service " + IMPALA_SERVICE_NAME + " using StateStore on " + IMPALA_SS_HOST + ", CatalogServer on " + IMPALA_CS_HOST + ", and ImpalaDaemons on "
    PRETTY_PRINT.pprint(IMPALA_ID_HOSTS)

    #CLUSTER.stop().wait()
    CLUSTER.start().wait()
    #post_startup(CLUSTER, hdfs_service, oozie_service)

    oozie_service = deploy_oozie(CLUSTER, OOZIE_SERVICE_NAME,
                                 OOZIE_SERVICE_CONFIG, OOZIE_SERVER_HOST,
                                 OOZIE_SERVER_CONFIG)
    print "Deployed Oozie service " + OOZIE_SERVICE_NAME + " using OozieServer on " + OOZIE_SERVER_HOST

    hue_service = deploy_hue(CLUSTER, HUE_SERVICE_NAME, HUE_SERVICE_CONFIG,
                             HUE_SERVER_HOST, HUE_SERVER_CONFIG, HUE_KTR_HOST,
                             HUE_KTR_CONFIG)
    print "Deployed HUE service " + HUE_SERVICE_NAME + " using HueServer on " + HUE_SERVER_HOST

    #post_startup(CLUSTER, hdfs_service)
    print "About to restart cluster."
    CLUSTER.stop().wait()
    CLUSTER.start().wait()
    print "Done restarting cluster."

    post_startup(CLUSTER, hdfs_service, oozie_service)
Ejemplo n.º 43
0
    'firehose_database_name': 'firehose'
}

logging.info('Updating role configurations')
for group in mgmt.get_all_role_config_groups():
    if group.roleType == "HOSTMONITOR":
        group.update_config(mgmt_hm_config)
    if group.roleType == "SERVICEMONITOR":
        group.update_config(mgmt_hm_config)

logging.info('Starting the Cloudera Manager service')
mgmt.start().wait()

# Update the Parcels repo
logging.info('Updating the remote parcels repo')
cm_config = api.get_cloudera_manager().get_config(view='full')
repo_urls = cdh_parcel_repo + ',' + kafka_parcel_repo
api.get_cloudera_manager().update_config(
    {'REMOTE_PARCEL_REPO_URLS': repo_urls})
time.sleep(10)

# Download the CDH Parcel
logging.info('Downloading the CDH parcel')
cluster_name = 'Open Data Platform'
cluster = api.create_cluster(cluster_name, version='CDH5')
cluster.add_hosts(hosts)
cdh_parcel = cluster.get_parcel('CDH', cdh_parcel_version)
cdh_parcel.start_download()
while True:
    cdh_parcel = cluster.get_parcel('CDH', cdh_parcel_version)
    if cdh_parcel.stage == 'DOWNLOADED':
#!/usr/bin/env python
import socket
import time
from cm_api.api_client import ApiResource
#initialize

hosts = [ ]
cm_host = "cloudera-pe-cm01"
api = ApiResource(cm_host, username="******", password="******")

# Distribute the CDH parcel

parcel_repo = 'http://archive.cloudera.com/cdh5/parcels/5.2.0'
#parcel_repo = 'http://archive.cloudera.com/cdh5/parcels/5.1.3/'
cm_config = api.get_cloudera_manager().get_config(view='full')
repo_config = cm_config['REMOTE_PARCEL_REPO_URLS']
value = repo_config.value or repo_config.default
value += ',' + parcel_repo
api.get_cloudera_manager().update_config({'REMOTE_PARCEL_REPO_URLS': value})
time.sleep(10)

# create cluster, add the hosts
cluster = api.create_cluster("cloudera-pe-test", "CDH5")
#api.create_host("master", "ip-10-238-154-140", "10.238.154.140")
#api.create_host("w01", "ip-10-143-183-98", "10.143.183.98")
#api.create_host("w02", "ip-10-140-38-88", "10.140.38.88")
#api.create_host("w03", "ip-10-140-28-243", "10.140.28.243")
#hosts.append("master")
#hosts.append("w01")
#hosts.append("w02")
#hosts.append("w03")
Ejemplo n.º 45
0
def create_cluster(config_dict):
    config.read(['./conf/hadrian.ini','./conf/cluster_specs.ini', './conf/cloudera-manager/cm.ini'])
    
    
    cm_cluster_name = config_grabber("Globals")['cm.cluster.name']
    cm_username = config_grabber("Globals")['cm.username']
    cm_password = config_grabber("Globals")['cm.password']
    cm_port = config_grabber("Globals")['cm.port']
    version = config_grabber('Globals')['cdh.cluster.version']
    cm_server = config_grabber(cm_cluster_name + '-en')['cm.server']
    
    #Grab all configuration files in the directory with the CM Cluster Name.
    
    for i in os.listdir('./conf/' + cm_cluster_name):
        config.read('./conf/' + cm_cluster_name + '/' + i)
    
    all_nodes = list()

    while (get_cm_status(cm_server + ':' + cm_port) != 200):
        print 'Waiting for CM Server to start... '
        time.sleep(15)
    
    api = ApiResource(cm_server, cm_port, cm_username, cm_password)
    # create cluster
    cluster = api.create_cluster(cm_cluster_name, version.upper())
    
    #Config CM
    print 'Applying any configuration changes to Cloudera Manager'
    cmanager = api.get_cloudera_manager()
    cmanager.update_config(config_grabber('cloudera-manager-updates'))
        
    planned_nodes = config_grabber(cm_cluster_name + '-en')['full.list'].split(',')
    for k, v in config_grabber(cm_cluster_name + '-dn').iteritems():
        for j in v.split(','):
            planned_nodes.append(j)
    
    # TODO make this smarter.  show which agents haven't checked in.  Add the option to continue without them.
    if len(api.get_all_hosts()) != len(planned_nodes):
        print 'Waiting for all agents to check into the CM Server before continuing.'
        
        while len(planned_nodes) > api.get_all_hosts():
            print 'Waiting for the final set of CM Agent nodes to check in.' 
            time.sleep(5)
        
    print 'Updating Rack configuration for data nodes.'
    all_hosts = list()
    for host in api.get_all_hosts():
        all_hosts.append(host.hostId)
        for k,v in config_grabber(cm_cluster_name + '-dn').iteritems():
            if host.hostname in v:
                print 'Setting host: ' + host.hostname + ' to rack /default/' + k
                host.set_rack_id('/default/' + k)
    
    print 'Adding all hosts to cluster.'
    cluster.add_hosts(all_hosts)

    # download CDH Parcels
    # TODO add some logic here to make the parcel list something that's read from the hadrian.ini
    # This will allow support for other CDH packages, Search, etc.
    if config_grabber('Globals')['cdh.distribution.method'] == 'parcels':
        distribute_parcel(cluster, 'CDH', config_grabber("Globals")['cdh.parcel.version'])
    
    if config_dict.get('hdfs_ha') == True:
        create_zookeeper_service(config_dict, cluster)
    create_hdfs_service(config_dict, cluster)    

    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configurations'
    else:
        print 'Client configuration deployment complete.'

    create_mapred_service(config_dict, cluster, cm_server)
    if config_dict.get('hbase') == True:
        if config_dict.get('hdfs_ha') == False:
            create_zookeeper_service(config_dict, cluster)
        create_hbase_service(config_dict, cluster)
    if config_dict.get('hive') == True:
         create_hive_service(config_dict, cluster)
    print 'Starting final client configuration deployment for all services.'
    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configuration.'
    else:
        print 'Client configuration deployment complete.  The cluster is all yours.  Happy Hadooping.'
def main():
    API = ApiResource(cm_config.CM_HOST,
                      version=16,
                      username=cm_config.ADMIN_USER,
                      password=cm_config.ADMIN_PASSWD)
    MANAGER = API.get_cloudera_manager()
    MANAGER.update_config(cm_config.CM_CONFIG)
    print "Connected to CM host on " + cm_config.CM_HOST + " and updated CM configuration"

    CLUSTER = init_cluster(API, cm_config.CLUSTER_NAME, cm_config.CDH_VERSION,
                           cm_config.CLUSTER_HOSTS, cm_config.CM_HOST)
    print "Initialized cluster " + cm_config.CLUSTER_NAME + " which uses CDH version " + cm_config.CDH_VERSION

    deploy_management(MANAGER, cm_config.MGMT_SERVICENAME,
                      cm_config.MGMT_SERVICE_CONFIG,
                      cm_config.MGMT_ROLE_CONFIG, cm_config.AMON_ROLENAME,
                      cm_config.AMON_ROLE_CONFIG, cm_config.APUB_ROLENAME,
                      cm_config.APUB_ROLE_CONFIG, cm_config.ESERV_ROLENAME,
                      cm_config.ESERV_ROLE_CONFIG, cm_config.HMON_ROLENAME,
                      cm_config.HMON_ROLE_CONFIG, cm_config.SMON_ROLENAME,
                      cm_config.SMON_ROLE_CONFIG, cm_config.RMAN_ROLENAME,
                      cm_config.RMAN_ROLE_CONFIG)
    print "Deployed CM management service " + cm_config.MGMT_SERVICENAME + " to run on " + cm_config.CM_HOST

    deploy_parcels(CLUSTER, cm_config.PARCELS)
    print "Downloaded and distributed parcels: "
    pretty_print(cm_config.PARCELS)

    zookeeper_service = deploy_zookeeper(CLUSTER,
                                         cm_config.ZOOKEEPER_SERVICE_NAME,
                                         cm_config.ZOOKEEPER_SERVER_HOSTS,
                                         cm_config.ZOOKEEPER_SERVICE_CONFIG,
                                         cm_config.ZOOKEEPER_ROLE_CONFIG)
    print "Deployed ZooKeeper " + cm_config.ZOOKEEPER_SERVICE_NAME + " to run on: "
    pretty_print(cm_config.ZOOKEEPER_SERVER_HOSTS)

    hdfs_service = deploy_hdfs(
        CLUSTER, cm_config.HDFS_SERVICE_NAME, cm_config.HDFS_SERVICE_CONFIG,
        cm_config.HDFS_NAMENODE_SERVICE_NAME, cm_config.HDFS_NAMENODE_HOST,
        cm_config.HDFS_NAMENODE_CONFIG, cm_config.HDFS_SECONDARY_NAMENODE_HOST,
        cm_config.HDFS_SECONDARY_NAMENODE_CONFIG,
        cm_config.HDFS_DATANODE_HOSTS, cm_config.HDFS_DATANODE_CONFIG,
        cm_config.HDFS_GATEWAY_HOSTS, cm_config.HDFS_GATEWAY_CONFIG)
    print "Deployed HDFS service " + cm_config.HDFS_SERVICE_NAME + " using NameNode on " + cm_config.HDFS_NAMENODE_HOST + ", SecondaryNameNode on " + cm_config.HDFS_SECONDARY_NAMENODE_HOST + ", and DataNodes running on: "
    pretty_print(cm_config.HDFS_DATANODE_HOSTS)
    init_hdfs(hdfs_service, cm_config.HDFS_SERVICE_NAME, cm_config.CMD_TIMEOUT)
    print "Initialized HDFS service"

    #     mapred and yarn are mutually exclusive; only deploy one of them
    #     mapred_service = deploy_mapreduce(CLUSTER, MAPRED_SERVICE_NAME, MAPRED_SERVICE_CONFIG, MAPRED_JT_HOST, MAPRED_JT_CONFIG, MAPRED_TT_HOSTS, MAPRED_TT_CONFIG, MAPRED_GW_HOSTS, MAPRED_GW_CONFIG)
    #     print "Deployed MapReduce service " + cm_config.MAPRED_SERVICE_NAME + " using JobTracker on " + cm_config.MAPRED_JT_HOST + " and TaskTrackers running on "
    #     pretty_print(cm_config.MAPRED_TT_HOSTS)

    yarn_service = deploy_yarn(
        CLUSTER, cm_config.YARN_SERVICE_NAME, cm_config.YARN_SERVICE_CONFIG,
        cm_config.YARN_RM_HOST, cm_config.YARN_RM_CONFIG,
        cm_config.YARN_JHS_HOST, cm_config.YARN_JHS_CONFIG,
        cm_config.YARN_NM_HOSTS, cm_config.YARN_NM_CONFIG,
        cm_config.YARN_GW_HOSTS, cm_config.YARN_GW_CONFIG)
    print "Deployed YARN service " + cm_config.YARN_SERVICE_NAME + " using ResourceManager on " + cm_config.YARN_RM_HOST + ", JobHistoryServer on " + cm_config.YARN_JHS_HOST + ", and NodeManagers on "
    pretty_print(cm_config.YARN_NM_HOSTS)

    spark_service = deploy_spark(CLUSTER, cm_config.SPARK_SERVICE_NAME,
                                 cm_config.SPARK_SERVICE_CONFIG,
                                 cm_config.SPARK_YARN_HISTORY_SERVER_HOST,
                                 cm_config.SPARK_YARN_HISTORY_SERVER_CONFIG,
                                 cm_config.SPARK_GATEWAY_HOST,
                                 cm_config.SPARK_GATEWAY_CONFIG)
    print "Deployed SPARK service " + cm_config.SPARK_SERVICE_NAME + " using SparkHistoryServer on " + cm_config.SPARK_YARN_HISTORY_SERVER_HOST + " and Spark Gateway on "
    pretty_print(cm_config.SPARK_GATEWAY_HOST)

    deploy_hbase(CLUSTER, cm_config.HBASE_SERVICE_NAME,
                 cm_config.HBASE_SERVICE_CONFIG, cm_config.HBASE_HM_HOST,
                 cm_config.HBASE_HM_CONFIG, cm_config.HBASE_RS_HOSTS,
                 cm_config.HBASE_RS_CONFIG)
    print "Deployed HBase service " + cm_config.HBASE_SERVICE_NAME + " using HMaster on " + cm_config.HBASE_HM_HOST + " and RegionServers on "
    pretty_print(cm_config.HBASE_RS_HOSTS)

    hive_service = deploy_hive(
        CLUSTER, cm_config.HIVE_SERVICE_NAME, cm_config.HIVE_SERVICE_CONFIG,
        cm_config.HIVE_HMS_HOST, cm_config.HIVE_HMS_CONFIG,
        cm_config.HIVE_HS2_HOST, cm_config.HIVE_HS2_CONFIG,
        cm_config.HIVE_WHC_HOST, cm_config.HIVE_WHC_CONFIG,
        cm_config.HIVE_GW_HOSTS, cm_config.HIVE_GW_CONFIG)
    print "Depoyed Hive service " + cm_config.HIVE_SERVICE_NAME + " using HiveMetastoreServer on " + cm_config.HIVE_HMS_HOST + " and HiveServer2 on " + cm_config.HIVE_HS2_HOST
    hive_service = CLUSTER.get_service("HIVE")
    hive_mysqldb_deploy()
    init_hive(hive_service)
    print "Initialized Hive service"

    impala_service = deploy_impala(
        CLUSTER, cm_config.IMPALA_SERVICE_NAME,
        cm_config.IMPALA_SERVICE_CONFIG, cm_config.IMPALA_SS_HOST,
        cm_config.IMPALA_SS_CONFIG, cm_config.IMPALA_CS_HOST,
        cm_config.IMPALA_CS_CONFIG, cm_config.IMPALA_ID_HOSTS,
        cm_config.IMPALA_ID_CONFIG)
    print "Deployed Impala service " + cm_config.IMPALA_SERVICE_NAME + " using StateStore on " + cm_config.IMPALA_SS_HOST + ", CatalogServer on " + cm_config.IMPALA_CS_HOST + ", and ImpalaDaemons on "
    pretty_print(cm_config.IMPALA_ID_HOSTS)

    kafka_service = deploy_kafka(CLUSTER, cm_config.KAFKA_SERVICE_NAME,
                                 cm_config.KAFKA_SERVICE_CONFIG,
                                 cm_config.KAFKA_BROKER_HOSTS,
                                 cm_config.KAFKA_BROKER_CONFIG)
    print "Deployed Kafka service :" + cm_config.KAFKA_SERVICE_NAME + " using Broker on"
    pretty_print(cm_config.KAFKA_BROKER_HOSTS)

    #Need to start the cluster now as subsequent services need the cluster to be runnign
    #TODO can we just start ZK, and maybe HDFS, instead of everything? It's just needed for the search service
    #     CLUSTER.first_run().wait()
    print("Deploy client config")
    CLUSTER.deploy_client_config().wait()
    print("Start hdfs,zookeeper service")
    zookeeper_service.start().wait()
    hdfs_service.start().wait()
    time.sleep(20)
    print("Create spark applicationHistory directory")
    comand="ssh -p %s root@%s 'sudo -u hdfs hadoop fs -mkdir -p /user/spark/applicationHistory && "\
           "sudo -u hdfs hadoop fs -chmod 777 /user/spark/applicationHistory && "\
           "sudo -u hdfs hadoop fs -chown -R  spark:spark /user/spark ' >/dev/null 2>&1 ;echo $?"%\
           (cm_config.NAME_NODE_HOST_INFO[3],cm_config.NAME_NODE_HOST_INFO[1])
    shell_command(comand)
    print "About to restart cluster"
    CLUSTER.restart().wait()
    #     CLUSTER.restart(redeploy_client_configuration=True).wait()
    print "Done restarting cluster"

    hdfs_service = CLUSTER.get_service("HDFS")
    post_startup(CLUSTER, hdfs_service)
    hive_service.restart().wait()
    impala_service.restart().wait()
    print "Finished deploying Cloudera cluster. Go to http://" + cm_config.CM_HOST + ":7180 to administer the cluster."
    print "If there are any other services not running, restart them now."