Ejemplo n.º 1
0
def main():
    """
    Kerberizes a cluster.

    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host, settings.port, settings.username,
                      settings.password, settings.use_tls, 8)

    cloudera_manager = api.get_cloudera_manager()
    cluster = api.get_cluster(settings.cluster)
    mgmt_service = cloudera_manager.get_service()

    if verify_cloudera_manager_has_kerberos_principal(cloudera_manager):
        wait_for_command('Stopping the cluster', cluster.stop())
        wait_for_command('Stopping MGMT services', mgmt_service.stop())
        configure_services(cluster)
        wait_for_generate_credentials(cloudera_manager)
        wait_for_command('Deploying client configs.',
                         cluster.deploy_client_config())
        wait_for_command('Deploying cluster client configs',
                         cluster.deploy_cluster_client_config())
        wait_for_command('Starting MGMT services', mgmt_service.start())
        wait_for_command('Starting the cluster', cluster.start())
    else:
        print "Cluster does not have Kerberos admin credentials.  Exiting!"

    return 0
def create_cluster():
    CM_HOST = (load_cfg(ansible_path+"/group_vars/all")).get("cm_host")
    USERNAME = (load_cfg(ansible_path+"/group_vars/all")).get("cm_username")
    PASSWORD = (load_cfg(ansible_path+"/group_vars/all")).get("cm_password")
    api = ApiResource(CM_HOST, version=API_VERSION, username=USERNAME, password=PASSWORD)
    cluster = api.get_cluster(CLUSTER_NAME)
    return cluster
def main():
    module = build_module()
    choice_map = {'present': present, 'distributed': distributed, 'activated': activated, 'absent': absent, 'infos': infos}
    params = module.params
    has_changed = False

    api = ApiResource(params["cm_host"], username=params["cm_login"], password=params["cm_login"], version=params["api_version"])

    try:
        cluster = api.get_cluster(params["cluster_name"])
    except ApiException as e:
        module.fail_json(msg="Cluster error : {0}".format(e))

    if params["product"] and params["version"]:
        parcel = get_parcel(cluster, params["product"], params["version"])
        if params["state"] != "infos":
            error, has_changed, result, meta = choice_map.get(params['state'])(cluster, parcel)

            if error:
                module.fail_json(msg=result)
            module.exit_json(changed=has_changed, msg=result, meta=meta)
        else:
            meta = {
                "product": parcel.product,
                "version": parcel.version,
                "stage": parcel.stage
            }
            module.exit_json(changed=False, msg="Parcel informations gathered", meta=meta)
    elif not params["product"] and not params["version"] and params["state"] == "infos":
        module.exit_json(changed=has_changed, msg="Parcel informations gathered", meta=infos(cluster))
Ejemplo n.º 4
0
def main():
    """
    Kerberizes a cluster.

    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host, settings.port, settings.username,
                      settings.password, settings.use_tls, 8)

    cloudera_manager = api.get_cloudera_manager()
    cluster = api.get_cluster(settings.cluster)
    mgmt_service = cloudera_manager.get_service()

    if verify_cloudera_manager_has_kerberos_principal(cloudera_manager):
        wait_for_command('Stopping the cluster', cluster.stop())
        wait_for_command('Stopping MGMT services', mgmt_service.stop())
        configure_services(cluster)
        wait_for_generate_credentials(cloudera_manager)
        wait_for_command('Deploying client configs.', cluster.deploy_client_config())
        wait_for_command('Deploying cluster client configs', cluster.deploy_cluster_client_config())
        wait_for_command('Starting MGMT services', mgmt_service.start())
        wait_for_command('Starting the cluster', cluster.start())
    else:
        print "Cluster does not have Kerberos admin credentials.  Exiting!"

    return 0
Ejemplo n.º 5
0
def main():
    #print sys.argv[0]
    #for i in range(1, len(sys.argv)):
    #    print "param ", i, sys.argv[i]

    # get a handle on the instance of CM that we have running
    api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=13)

    # get the CM instancepython2.7 setuptools
    cm = ClouderaManager(api)

    cluster = api.get_cluster(cluster_name)

    # distribution_parcels(api, cluster)

    cmd = cluster.first_run()

    while cmd.success == None:
        cmd = cmd.fetch()

    if cmd.success != True:
        print "The first run command failed: " + cmd.resultMessage()
        exit(0)

    print "First run successfully executed. Your cluster has been set up!"
Ejemplo n.º 6
0
class ImpalaCluster(object):
  def __init__(self, cm_host, cm_cluster_name, username, password):
    self.cm_api = ApiResource(cm_host, username=username, password=password)
    self.hosts = dict()
    self.services = list()
    self.cluster = self.cm_api.get_cluster(cm_cluster_name)
    if self.cluster is None:
      raise RuntimeError, 'Cluster name "%s" not found' % cm_cluster_name

    self.__load_hosts()
    self.__impala_service = ImpalaService(self)

  def _get_all_services(self):
    return self.cluster.get_all_services()

  def get_impala_service(self):
    return self.__impala_service

  def __load_hosts(self):
    self.hosts = dict()
    # Search for all hosts that are in the target cluster.
    # There is no API that provides the list of host in a given cluster, so to find them
    # we must loop through all the hosts and check the cluster name matches.
    for host_info in self.cm_api.get_all_hosts():
      # host_info doesn't include a link to the roleRef so need to do another lookup
      # based on the hostId.
      host = self.cm_api.get_host(host_info.hostId)
      for roleRef.get('clusterName') == self.cluster_name:
        self.hosts[host_info.hostId] = Host(host)
          break
Ejemplo n.º 7
0
def main():
    # connect cm api
    api = ApiResource(CM_HOST,
                      7180,
                      username=CM_USERNAME,
                      password=CM_PASSWORD)
    manager = api.get_cloudera_manager()
    # no need to update cm config
    #manager.update_config(cm_host)
    print("[INFO] Connected to CM host on " + CM_HOST)

    # create cluster object
    try:
        cluster = api.get_cluster(name=CLUSTER_NAME)
    except:
        cluster = init_cluster(api, CLUSTER_NAME, CLUSTER_VERSION,
                               CLUSTER_NODE_COUNT)
    print("[INFO] Initialized cluster " + CLUSTER_NAME +
          " which uses CDH version " + CLUSTER_VERSION)

    #
    mgmt_servicename = "MGMT"
    amon_role_name = "ACTIVITYMONITOR"
    apub_role_name = "ALERTPUBLISHER"
    eserv_role_name = "EVENTSERVER"
    hmon_role_name = "HOSTMONITOR"
    smon_role_name = "SERVICEMONITOR"
    nav_role_name = "NAVIGATOR"
    navms_role_name = "NAVIGATORMETADATASERVER"
    rman_role_name = "REPORTMANAGER"
    deploy_management(manager, mgmt_servicename, amon_role_name,
                      apub_role_name, eserv_role_name, hmon_role_name,
                      smon_role_name, nav_role_name, navms_role_name,
                      rman_role_name)
    print("[INFO] Deployed CM management service " + mgmt_servicename +
          " to run on " + CM_HOST)

    #
    assign_roles(api, cluster)
    print("[INFO] all roles have assigned.")

    #
    # Custom role config groups cannot be automatically configured: Gateway Group 1 (error 400)
    try:
        cluster.auto_configure()
    except:
        pass
    update_custom_config(api, cluster)
    print("[INFO] all servies and roles have configured.")
    #
    cmd = cluster.first_run()
    while cmd.success == None:
        cmd = cmd.fetch()
    if not cmd.success:
        print("[ERROR] The first run command failed: " + cmd.resultMessage())
    else:
        print(
            "[INFO] First run successfully executed. Your cluster has been set up!"
        )
Ejemplo n.º 8
0
def main():
   API = ApiResource(CM_HOST, version=5, username=ADMIN_USER, password=ADMIN_PASS)
   print "Connected to CM host on " + CM_HOST

   CLUSTER = API.get_cluster(CLUSTER_NAME)

   print "About to stop cluster."
   CLUSTER.stop().wait()
   print "Done stopping cluster."
Ejemplo n.º 9
0
def main():
   API = ApiResource(CM_HOST, version=5, username=ADMIN_USER, password=ADMIN_PASS)
   print "Connected to CM host on " + CM_HOST

   CLUSTER = API.get_cluster(CLUSTER_NAME)

   print "About to restart cluster."
   CLUSTER.restart().wait()
   print "Done restarting cluster."
def do_call(host, port, version, user, password, cluster_name, parcel_name, parcel_version, parcel_repo, init_pre_dir, init_post_dir):
    api = ApiResource(host, port, user, password, False, version)
    if not parcel_repo.endswith('/'):
        parcel_repo += '/'
    if re.match(REGEX_VERSION, parcel_version) is None or re.match(REGEX_VERSION, parcel_version).group() != parcel_version:
        raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] expected to match regular expression [' + REGEX_VERSION + ']')
    if not parcel_repo.endswith(parcel_version + '/'):
        raise Exception('Parcel [' + parcel_name + '] is qualified by invalid version [' + parcel_version + '] when compared with repository [' + parcel_repo + ']')    
    cm_config = api.get_cloudera_manager().get_config(view='full')
    repo_config = cm_config['REMOTE_PARCEL_REPO_URLS']
    repo_list = repo_config.value or repo_config.default
    if parcel_repo not in repo_list:     
        repo_list += ',' + parcel_repo
        api.get_cloudera_manager().update_config({'REMOTE_PARCEL_REPO_URLS': repo_list})
        time.sleep(POLL_SEC)  # The parcel synchronize end-point is not exposed via the API, so sleep instead
    cluster_names = []
    if cluster_name is None:
        for cluster in api.get_all_clusters():
            cluster_names.append(cluster.name)
    else:
        cluster_names.append(cluster_name)
    for cluster_name_itr in cluster_names:
        print 'Cluster [DEPLOYMENT] starting ... '
        cluster = api.get_cluster(cluster_name_itr)
        parcel = cluster.get_parcel(parcel_name, parcel_version)
        print 'Parcel [DEPLOYMENT] starting ... '
        do_parcel_op(cluster, parcel_name, parcel_version, 'DOWNLOAD', 'AVAILABLE_REMOTELY', 'DOWNLOADED', 'start_download')
        do_parcel_op(cluster, parcel_name, parcel_version, 'DISTRIBUTE', 'DOWNLOADED', 'DISTRIBUTED', 'start_distribution')
        do_parcel_op(cluster, parcel_name, parcel_version, 'ACTIVATE', 'DISTRIBUTED', 'ACTIVATED', 'activate')
        parcel = cluster.get_parcel(parcel_name, parcel_version)
        if parcel.stage != 'ACTIVATED':
            raise Exception('Parcel is currently mid-stage [' + parcel.stage + '], please wait for this to complete')
        print 'Parcel [DEPLOYMENT] finished'
        if init_pre_dir is not None and os.path.isdir(init_pre_dir):
            print 'Cluster [PRE_INIT] starting ... '
            for script in glob.glob(init_pre_dir + '/*.sh'):
                subprocess.call([script])
            print 'Cluster [PRE_INIT] finihsed'            
        print 'Cluster [CONFIG_DEPLOYMENT] starting ... '
        cluster.deploy_client_config()
        cmd = cluster.deploy_client_config()
        if not cmd.wait(TIMEOUT_SEC).success:
            raise Exception('Failed to deploy client configs')
        print 'Cluster [CONFIG_DEPLOYMENT] finihsed'
        print 'Cluster [STOP] starting ... '
        cluster.stop().wait()
        print 'Cluster [STOP] finihsed'
        print 'Cluster [START] starting ... '
        cluster.start().wait()
        print 'Cluster [START] finihsed'
        if init_post_dir is not None and os.path.isdir(init_post_dir):
            print 'Cluster [POST_INIT] starting ... '
            for script in glob.glob(init_post_dir + '/*.sh'):
                subprocess.call([script])
            print 'Cluster [POST_INIT] finihsed'            
        print 'Cluster [DEPLOYMENT] finished'
def main():
    """
    Add peer to the cluster.
    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = parse_args()
    if len(sys.argv) == 1 or len(sys.argv) > 17:
        print_usage_message()
        quit(1)

    api = ApiResource(settings.server, settings.port, settings.username,
                      settings.password, settings.use_tls, 14)

    yarn_service = get_service_name('YARN', api, settings.cluster_name)
    hdfs_name = get_service_name('HDFS', api, settings.cluster_name)

    hdfs = api.get_cluster(settings.cluster_name).get_service(hdfs_name)

    hdfs_cloud_args = ApiHdfsCloudReplicationArguments(None)
    hdfs_cloud_args.sourceService = ApiServiceRef(None,
                                                  peerName=None,
                                                  clusterName=settings.cluster_name,
                                                  serviceName=hdfs_name)
    hdfs_cloud_args.sourcePath = settings.source_path
    hdfs_cloud_args.destinationPath = settings.target_path
    hdfs_cloud_args.destinationAccount = settings.account_name
    hdfs_cloud_args.mapreduceServiceName = yarn_service

    # creating a schedule with daily frequency
    start = datetime.datetime.now()
    # The time at which the scheduled activity is triggered for the first time.
    end = start + datetime.timedelta(days=365)
    # The time after which the scheduled activity will no longer be triggered.

    schedule = hdfs.create_replication_schedule(start, end, "DAY", 1, True, hdfs_cloud_args)

    ## Updating the Schedule's properties
    # schedule.hdfsArguments.removeMissingFiles = False
    schedule.alertOnFail = True
    schedule = hdfs.update_replication_schedule(schedule.id, schedule)

    print "Schedule created with Schdule ID: " + str(schedule.id)
    # print schedule.alertOnFail
    # print schedule.hdfsArguments.removeMissingFiles
    # print schedule.hdfsArguments.sourcePath
    # print schedule.hdfsArguments.preserveXAttrs
    # print schedule.hdfsArguments.exclusionFilters
    # print schedule.hdfsArguments.replicationStrategy
    # print schedule.hdfsArguments.numMaps
    # print schedule.hdfsArguments.userName
    # print schedule.hdfsArguments.schedulerPoolName
    # print type(schedule)

    return 0
Ejemplo n.º 12
0
def main():
    #print sys.argv[0]
    #for i in range(1, len(sys.argv)):
    #    print "param ", i, sys.argv[i]

    # get a handle on the instance of CM that we have running
    api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=13)

    # get the CM instancepython2.7 setuptools
    cm = ClouderaManager(api)

    cluster = api.get_cluster(cluster_name)
    distribution_parcels(api, cluster)
Ejemplo n.º 13
0
def main():
    # connect cm api
    api = ApiResource(CM_HOST, 7180, username=CM_USERNAME, password=CM_PASSWORD)
    manager = api.get_cloudera_manager()
    # no need to update cm config
    #manager.update_config(cm_host)
    print("[INFO] Connected to CM host on " + CM_HOST)

    # create cluster object
    try:
        cluster = api.get_cluster(name=CLUSTER_NAME)
    except:
        cluster = init_cluster(api, CLUSTER_NAME, CLUSTER_VERSION, CLUSTER_NODE_COUNT)
    print("[INFO] Initialized cluster " + CLUSTER_NAME + " which uses CDH version " + CLUSTER_VERSION)

    #
    mgmt_servicename = "MGMT"
    amon_role_name = "ACTIVITYMONITOR"
    apub_role_name = "ALERTPUBLISHER"
    eserv_role_name = "EVENTSERVER"
    hmon_role_name = "HOSTMONITOR"
    smon_role_name = "SERVICEMONITOR"
    nav_role_name = "NAVIGATOR"
    navms_role_name = "NAVIGATORMETADATASERVER"
    rman_role_name = "REPORTMANAGER"
    deploy_management(manager, mgmt_servicename, amon_role_name, apub_role_name, eserv_role_name, hmon_role_name, smon_role_name, nav_role_name, navms_role_name, rman_role_name)
    print("[INFO] Deployed CM management service " + mgmt_servicename + " to run on " + CM_HOST)

    #
    assign_roles(api, cluster)
    print("[INFO] all roles have assigned.")

    #
    # Custom role config groups cannot be automatically configured: Gateway Group 1 (error 400)
    try:
        cluster.auto_configure()
    except:
        pass
    update_custom_config(api, cluster)
    print("[INFO] all servies and roles have configured.")
    #
    cmd = cluster.first_run()
    while cmd.success == None:
        cmd = cmd.fetch()
    if not cmd.success:
        print("[ERROR] The first run command failed: " + cmd.resultMessage())
    else:
        print("[INFO] First run successfully executed. Your cluster has been set up!")
Ejemplo n.º 14
0
def main():
    #script options
    parser = OptionParser()
    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False,
                      help='Be more verbose')
    parser.add_option('-m', '--manager', dest='manager',
                      help='CDH manager address',)
    parser.add_option('-a', '--action', type='choice', action='store', dest='action',
                      choices=['manager', 'service_list', 'service_health'], default='manager',
                      help='Action to take')
    parser.add_option('-n', '--name', dest='name',
                      help='Name of the item to check',)
    (options, args) = parser.parse_args()

    #logging
    logging.basicConfig()
    if not options.manager:
        print 'You must specify a manager address'
        sys.exit(-1)
    if options.verbose:
        logging.getLogger().setLevel(logging.DEBUG)

    api = ApiResource(options.manager, username=ADMIN_USER, password=ADMIN_PASS, version=9)
    try:
        cluster = api.get_cluster(CLUSTER_NAME)
        if options.action == 'manager':
            print 'OK'
            sys.exit(0)
    except urllib2.URLError:
        print 'Could not connect to API'
        sys.exit(-1)

    if options.action == 'service_list':
        response = {'data': [{'{#SERVICENAME}': svc.name} for svc in cluster.get_all_services()]}
        print json.dumps(response)
    elif options.action == 'service_health':
        if not options.name:
            print 'Must specify a name for this check'
            sys.exit(-1)
        svc = cluster.get_service(options.name)
        if svc.healthSummary == 'GOOD':
            print 'OK'
            sys.exit(0)
        else:
            failed_checks = " ".join([
                check['name'] for check in svc.healthChecks if check['summary'] != 'GOOD'
            ])
            print 'Health is {0}. Failed checks: {1}'.format(svc.healthSummary, failed_checks)
Ejemplo n.º 15
0
def main():
    resource = ApiResource("localhost", 7180, "cloudera", "cloudera", version=19)
    cluster = resource.get_cluster("Cloudera Quickstart")

    cm_manager = resource.get_cloudera_manager()
    cm_manager.update_config({'REMOTE_PARCEL_REPO_URLS': PARCEL_REPO})
    cm_manager.update_all_hosts_config(JDK_CONFIG)
    time.sleep(5)

    for parcel in PARCELS:
        ParcelInstaller(parcel['name'], parcel['version']).install(cluster)

    print "Restarting cluster"
    cluster.stop().wait()
    cluster.start().wait()
    print "Done restarting cluster"
Ejemplo n.º 16
0
def main():
    """
    Enables HDFS HA on a cluster.

    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host,
                      settings.port,
                      settings.username,
                      settings.password,
                      version=6)

    if not validate_cluster(api, settings.cluster):
        write_to_stdout(
            "Cluster does not satisfy preconditions for enabling HDFS HA. Exiting!"
        )
        return 1

    if settings.wait_for_good_health:
        write_to_stdout("Waiting for GOOD health... ")
        if not wait_for_good_health(api, settings.cluster):
            write_to_stdout("Cluster health is not GOOD.  Exiting!\n")
            return 1
    else:
        write_to_stdout("Checking cluster health... ")
        if not check_health(api, settings.cluster):
            write_to_stdout("Cluster health is not GOOD.  Exiting!\n")

    write_to_stdout("Cluster health is GOOD!\n")

    cluster = api.get_cluster(settings.cluster)

    invoke_hdfs_enable_nn_ha(cluster, settings.nameservice)
    update_hive_for_ha_hdfs(cluster)

    # Restarting the MGMT services to make sure the HDFS file browser functions
    # as expected.
    cloudera_manager = api.get_cloudera_manager()
    mgmt_service = cloudera_manager.get_service()
    wait_for_command('Restarting MGMT services', mgmt_service.restart())

    return 0
def main():
    """
    Configures a cluster.
    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host, settings.port, settings.username,
                      settings.password, settings.use_tls, 8)

    cluster = api.get_cluster(settings.cluster)

    configure_services(cluster)
    wait_for_command('Deploying client configs.',
                     cluster.deploy_client_config())
    wait_for_command('Restarting the cluster', cluster.stop())
    wait_for_command('Restarting the cluster', cluster.start())

    return 0
Ejemplo n.º 18
0
def main():
    """
    Configures a cluster.
    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host, settings.port, settings.username,
                      settings.password, settings.use_tls, 8)

    cluster = api.get_cluster(settings.cluster)

    #manage_cm_users(api)
    users = ['test1', 'test2']
    for user in users:
        print user
        api.create_user(user, 'nielsen#123', ['ROLE_USER'])

    return 0
class Credentials:
    def __init__(self, host, port, user, passw, version, cluster):
        self.api = ApiResource(str(host),
                               port,
                               username=str(user),
                               password=str(passw),
                               version=int(version))
        cluster1 = self.api.get_cluster(cluster)
        self.service_list = cluster1.get_all_services()
        self.serviceList = [
            "OOZIE", "KS_INDEXER", "SQOOP", "ZOOKEEPER", "HUE", "FLUME",
            "IMPALA", "HDFS", "SOLR", "HBASE", "YARN", "HIVE", "SPARK",
            "SENTRY"
        ]
        self.list = []

        for service in self.service_list:
            if service.type in self.serviceList:
                self.list.append(service)

        self.dictionary = dict(zip(self.serviceList, self.list))
Ejemplo n.º 20
0
def main():
    
    """
    TODO: This probably needs some work.  You get the idea though.  
    An example of how to do a bulk config update to Cloudera Manager.  This is helpful if you have a bunch of changes
    That you want to make but don't want to use the GUI.  
    """
    
    parser = argparse.ArgumentParser(description='Cloudera Manager Bulk Config Update Script')
    parser.add_argument('-H', '--host', '--hostname', action='store', dest='hostname', required=True, help='CM server host')
    parser.add_argument('-p', '--port', action='store', dest='port', type=int, default=7180, help='example: 7180')
    parser.add_argument('-u', '--user', '--username', action='store', dest='username', required=True, help='example: admin')
    parser.add_argument('-c', '--cluster', action='store', dest='cluster', required=True, help='example: hadrian-cluster')
    args = parser.parse_args() 
    password = getpass.getpass('Please enter your Cloudera Manager passsword: ')
    
    # read configuration files:
    for i in os.listdir('./conf/' + args.cluster):
        config.read('./conf/' + args.cluster + '/' + i)
    
    api = ApiResource(args.hostname, args.port, args.username, password)
    cluster = api.get_cluster(args.cluster)
    services = cluster.get_all_services()
   
    # update services based with configuration file parameters   
    for service in services:
        if config_grabber.has_section(service.type):
            service.update_config(svc_config=config_grabber(service.name + '-svc-config'))
            config_groups = config_grabber(service.name)['config_groups']
            for config_group in config_groups.split(','):
                print section
                temp_config_group = service.get_role_config_group(section)
                temp_config_group.update_config(config_grabber(section))
        else:
            print 'unknown service: ' + service.name

    print 'Starting final client configuration deployment for all services.'
    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configuration.'
Ejemplo n.º 21
0
def main():
    """
    Enables HDFS HA on a cluster.

    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = retrieve_args()

    api = ApiResource(settings.host, settings.port, settings.username, settings.password,
                      version=6)

    if not validate_cluster(api, settings.cluster):
        write_to_stdout("Cluster does not satisfy preconditions for enabling HDFS HA. Exiting!")
        return 1

    if settings.wait_for_good_health:
        write_to_stdout("Waiting for GOOD health... ")
        if not wait_for_good_health(api, settings.cluster):
            write_to_stdout("Cluster health is not GOOD.  Exiting!\n")
            return 1
    else:
        write_to_stdout("Checking cluster health... ")
        if not check_health(api, settings.cluster):
            write_to_stdout("Cluster health is not GOOD.  Exiting!\n")

    write_to_stdout("Cluster health is GOOD!\n")

    cluster = api.get_cluster(settings.cluster)

    invoke_hdfs_enable_nn_ha(cluster, settings.nameservice)
    update_hive_for_ha_hdfs(cluster)

    # Restarting the MGMT services to make sure the HDFS file browser functions
    # as expected.
    cloudera_manager = api.get_cloudera_manager()
    mgmt_service = cloudera_manager.get_service()
    wait_for_command('Restarting MGMT services', mgmt_service.restart())

    return 0
Ejemplo n.º 22
0
def main():
    """
   This is an example script for printing the default configurations for a CM Service.
   It's rough, but it gets the job done.  This  is how you can see all of the settings
   you've made for service along iwth the defaults.  Helpful if you are just curious
   what things look like.  For a more Hadrian-ish way to export configurations,
   see ExportConfigs.py
   """

    api = ApiResource('<cloudera manager server>', 7180, '<username>',
                      '<password>')
    cluster = api.get_cluster('CM')
    service = cluster.get_service('<service name>')

    for i in service.get_all_role_config_groups():
        print '--------------------------------------------------------'
        print i.name
        print '--------------------------------------------------------'
        for k, v in i.get_config('full').iteritems():
            if v.value is None:
                print k + ' - default - ' + str(v.default)
            else:
                print k + ' - ' + str(v.value)
Ejemplo n.º 23
0
def main():

   """
   This is an example script for printing the default configurations for a CM Service.
   It's rough, but it gets the job done.  This  is how you can see all of the settings
   you've made for service along iwth the defaults.  Helpful if you are just curious
   what things look like.  For a more Hadrian-ish way to export configurations,
   see ExportConfigs.py
   """

   api = ApiResource('<cloudera manager server>', 7180, '<username>', '<password>')
   cluster = api.get_cluster('CM')
   service = cluster.get_service('<service name>')
   
   for i in service.get_all_role_config_groups():
      print '--------------------------------------------------------'
      print i.name
      print '--------------------------------------------------------'
      for k,v in i.get_config('full').iteritems():
          if v.value is None:  
             print k + ' - default - ' + str(v.default)
          else: 
             print k + ' - ' + str(v.value)
Ejemplo n.º 24
0
def main():
    """
    Add peer to the cluster.
    @rtype:   number
    @returns: A number representing the status of success.
    """
    settings = parse_args()
    if len(sys.argv) == 1 or len(sys.argv) > 17:
        print_usage_message()
        quit(1)
    api = ApiResource(settings.server, settings.port, settings.username,
                      settings.password, settings.use_tls, 14)
    target_hdfs_name = get_service_name('HDFS', api,
                                        settings.target_cluster_name)
    hdfs = api.get_cluster(
        settings.target_cluster_name).get_service(target_hdfs_name)
    cmd = hdfs.trigger_replication_schedule(settings.schedule_id)
    cmd.wait()
    result = hdfs.get_replication_schedule(settings.schedule_id).history[0]
    hdfsresult = hdfs.get_replication_schedule(
        settings.schedule_id).history[0].hdfsResult
    if result.success is False:
        print "######  Replication job failed  #####"
        print "Yarn Job ID :" + str(hdfsresult.jobId)
        print "Job Details URL:" + str(hdfsresult.jobDetailsUri)
        print "Setup Error:" + str(hdfsresult.setupError)
    else:
        print "######  Replication job succeeded  #####"
        print "Yarn Job ID :" + str(hdfsresult.jobId)
        print "Job Details URL:" + str(hdfsresult.jobDetailsUri)
        print "numFilesCopied:" + str(hdfsresult.numFilesCopied)
        print "numBytesCopied:" + str(hdfsresult.numBytesCopied)
        print "numFilesSkipped:" + str(hdfsresult.numFilesSkipped)
        print "numBytesSkipped:" + str(hdfsresult.numBytesSkipped)

    return 0
#hosts.append("master")
#hosts.append("w01")
#hosts.append("w02")
#hosts.append("w03")
hosts.append("ip-10-11-167-80")
hosts.append("ip-10-153-224-197")
hosts.append("ip-10-37-166-245")
hosts.append("ip-10-169-69-118")
cluster.add_hosts(hosts)

# Downloads and distributes parcels

# Had to recreate the cluster object as follows. For some reason doing a cluster.get_parcel was
# failing while the cluster object was api.create_cluster() 

cluster = api.get_cluster("cloudera-pe-test")
#parcel = cluster.get_parcel("CDH", "5.2.0-1.cdh5.2.0.p0.36")
parcel = cluster.get_parcel("CDH", "5.2.0-1.cdh5.2.0.p0.36")
parcel.start_download();
while True:
    parcel = cluster.get_parcel("CDH", "5.2.0-1.cdh5.2.0.p0.36")
    if parcel.stage != "DOWNLOADED":
    	print "Downloading : %s / %s" % ( parcel.state.progress, parcel.state.totalProgress)
    else:
	break

parcel.start_distribution()
while True:
    parcel = cluster.get_parcel("CDH", "5.2.0-1.cdh5.2.0.p0.36")
    if parcel.stage != "DISTRIBUTED":
        print "Distributing: %s / %s" % ( parcel.state.progress, parcel.state.totalProgress)
Ejemplo n.º 26
0
class ClouderaManager(object):
    """
    The complete orchestration of a cluster from start to finish assuming all the hosts are
    configured and Cloudera Manager is installed with all the required databases setup.

    Handle all the steps required in creating a cluster. All the functions are built to function
    idempotently. So you should be able to resume from any failed step but running thru the
    __class__.setup()
    """

    def __init__(self, module, config, trial=False, license_txt=None):
        self.api = ApiResource(config['cm']['host'], username=config['cm']['username'],
                               password=config['cm']['password'])
        self.manager = self.api.get_cloudera_manager()
        self.config = config
        self.module = module
        self.trial = trial
        self.license_txt = license_txt
        self.cluster = None

    def enable_license(self):
        """
        Enable the requested license, either it's trial mode or a full license is entered and
        registered.
        """
        try:
            _license = self.manager.get_license()
        except ApiException:
            print_json(type="LICENSE", msg="Enabling license")
            if self.trial:
                self.manager.begin_trial()
            else:
                if license_txt is not None:
                    self.manager.update_license(license_txt)
                else:
                    fail(self.module, 'License should be provided or trial should be specified')

            try:
                _license = self.manager.get_license()
            except ApiException:
                fail(self.module, 'Failed enabling license')
        print_json(type="LICENSE",
                   msg="Owner: {}, UUID: {}".format(_license.owner, _license.uuid))

    def create_cluster(self):
        """
        Create a cluster and add hosts to the cluster. A new cluster is only created
        if another one doesn't exist with the same name.
        """
        print_json(type="CLUSTER", msg="Creating cluster")
        cluster_config = self.config['cluster']
        try:
            self.cluster = self.api.get_cluster(cluster_config['name'])
        except ApiException:
            print_json(type="CLUSTER",
                       msg="Creating Cluster entity: {}".format(cluster_config['name']))
            self.cluster = self.api.create_cluster(cluster_config['name'],
                                                   cluster_config['version'],
                                                   cluster_config['fullVersion'])

        cluster_hosts = [self.api.get_host(host.hostId).hostname
                         for host in self.cluster.list_hosts()]
        hosts = []
        for host in cluster_config['hosts']:
            if host not in cluster_hosts:
                hosts.append(host)
        self.cluster.add_hosts(hosts)

    def activate_parcels(self):
        print_json(type="PARCELS", msg="Setting up parcels")
        for parcel_cfg in self.config['parcels']:
            parcel = Parcels(self.module, self.manager, self.cluster,
                             parcel_cfg.get('version'), parcel_cfg.get('repo'),
                             parcel_cfg.get('product', 'CDH'))
            parcel.download()
            parcel.distribute()
            parcel.activate()

    @retry(attempts=20, delay=5)
    def wait_inspect_hosts(self, cmd):
        """
        Inspect all the hosts. Basically wait till the check completes on all hosts.

        :param cmd: A command instance used for tracking the status of the command
        """
        print_json(type="HOSTS", msg="Inspecting hosts")
        cmd = cmd.fetch()
        if cmd.success is None:
            raise ApiException("Waiting on command {} to finish".format(cmd))
        elif not cmd.success:
            if (cmd.resultMessage is not None and
                    'is not currently available for execution' in cmd.resultMessage):
                raise ApiException('Retry Command')
            fail(self.module, 'Host inspection failed')
        print_json(type="HOSTS", msg="Host inspection completed: {}".format(cmd.resultMessage))

    def deploy_mgmt_services(self):
        """
        Configure, deploy and start all the Cloudera Management Services.
        """
        print_json(type="MGMT", msg="Deploying Management Services")
        try:
            mgmt = self.manager.get_service()
            if mgmt.serviceState == 'STARTED':
                return
        except ApiException:
            print_json(type="MGMT", msg="Management Services don't exist. Creating.")
            mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo())

        for role in config['services']['MGMT']['roles']:
            if not len(mgmt.get_roles_by_type(role['group'])) > 0:
                print_json(type="MGMT", msg="Creating role for {}".format(role['group']))
                mgmt.create_role('{}-1'.format(role['group']), role['group'], role['hosts'][0])

        for role in config['services']['MGMT']['roles']:
            role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format(role['group']))
            role_group.update_config(role.get('config', {}))

        mgmt.start().wait()
        if self.manager.get_service().serviceState == 'STARTED':
            print_json(type="MGMT", msg="Management Services started")
        else:
            fail(self.module, "[MGMT] Cloudera Management services didn't start up properly")

    def service_orchestrate(self, services):
        """
        Create, pre-configure provided list of services
        Stop/Start those services
        Perform and post service startup actions

        :param services: List of Services to perform service specific actions
        """
        service_classes = []

        # Create and pre-configure provided services
        for service in services:
            service_config = self.config['services'].get(service.upper())
            if service_config:
                svc = getattr(sys.modules[__name__], service)(self.cluster, service_config)
                if not svc.started:
                    svc.deploy()
                    svc.pre_start()
                service_classes.append(svc)

        print_json(type="CLUSTER", msg="Starting services: {} on Cluster".format(services))

        # Deploy all the client configs, since some of the services depend on other services
        # and is essential that the client configs are in place
        self.cluster.deploy_client_config()

        # Start each service and run the post_start actions for each service
        for svc in service_classes:
            # Only go thru the steps if the service is not yet started. This helps with
            # re-running the script after fixing errors
            if not svc.started:
                svc.start()
                svc.post_start()

    def setup(self):
        # TODO(rnirmal): Cloudera Manager SSL?

        # Enable a full license or start a trial
        self.enable_license()

        # Create the cluster entity and associate hosts
        self.create_cluster()

        # Download and activate the parcels
        self.activate_parcels()

        # Inspect all the hosts
        self.wait_inspect_hosts(self.manager.inspect_hosts())

        # Create Management services
        self.deploy_mgmt_services()

        # Configure and Start base services
        self.service_orchestrate(BASE_SERVICES)

        # Configure and Start remaining services
        self.service_orchestrate(ADDITIONAL_SERVICES)
Ejemplo n.º 27
0
#!/usr/bin/env python
import sys
from cm_api.api_client import ApiResource

CMD_TIMEOUT = 180

manager_host = sys.argv[1]
cluster_name = sys.argv[2]
action = sys.argv[3]

api = ApiResource(manager_host,
                  username="******",
                  password="******",
                  use_tls=False,
                  version=4)
cluster = api.get_cluster(cluster_name)

if action == "format":
    hdfs = cluster.get_service("hdfs1")
    cmd = hdfs.format_hdfs("hdfs1_NAMENODE_1")[0]
    if not cmd.wait(CMD_TIMEOUT).success:
        raise Exception("Failed to format HDFS")
elif action == "start":
    service_name = sys.argv[4]
    service = cluster.get_service(service_name)
    if service_name == "hdfs1" or service_name == "mapreduce1":  ## TODO! Refactoring is needed
        service_config = {}
        if service_name == "hdfs1":
            # TODO: HACK!! disable dfs permissions
            service_config = {'dfs_permissions': False}
        if service_name == "mapreduce1":
Ejemplo n.º 28
0
#!/usr/bin/env python

import sys
import socket
from cm_api.api_client import ApiResource
from cm_api.api_client import ApiException

CMD_TIMEOUT = 180
api = ApiResource(sys.argv[1],
                  username="******",
                  password="******",
                  use_tls=False,
                  version=4)
cluster = api.get_cluster(sys.argv[2])

hdfs = cluster.get_service('hdfs1')
hdfs_service_config = {'dfs_block_local_path_access_user': '******'}
hdfs_roles_names = []
roles_types = hdfs.get_role_types()
for role_type in roles_types:
    roles = hdfs.get_roles_by_type(role_type)
    for role in roles:
        hdfs_roles_names.append(role.name)

hdfs.update_config(svc_config=hdfs_service_config)

cmd_hdfs = hdfs.deploy_client_config(*hdfs_roles_names)

if not cmd_hdfs.wait(CMD_TIMEOUT).success:
    raise Exception("Failed to deploy HDS client configuration")
def status(host, user, passw):
    cm_host = str(hostname)
    api = ApiResource(cm_host,
                      7180,
                      username=str(username),
                      password=str(password),
                      version=9)

    # Get a list of all clusters
    cluster = api.get_cluster("Cloudera QuickStart")
    service_list = cluster.get_all_services()
    for service in service_list:
        if service.type == "OOZIE":
            print("===================================================")
            print("Entered Oozie")
            oozie_service = service
            print
            ("Located Oozie Service: " + service.name)
            print("State:  " + oozie_service.serviceState + "\n" +
                  "Health:  " + oozie_service.healthSummary)

        if service.type == "KS_INDEXER":
            print
            ("===================================================")
            print("Entered ks_indexer")
            ks_indexer_service = service
            print
            ("Located ks_indexer Service: " + service.name)
            print("State:  " + ks_indexer_service.serviceState + "\n" +
                  "Health:  " + ks_indexer_service.healthSummary)

        if service.type == "SQOOP":
            print
            ("===================================================")
            print("Entered sqoop")
            sqoop_service = service
            print
            ("Located sqoop Service: " + service.name)
            print("State:  " + sqoop_service.serviceState + "\n" +
                  "Health:  " + sqoop_service.healthSummary)

        if service.type == "ZOOKEEPER":
            print
            ("===================================================")
            print("Entered zookeeper")
            zookeeper_service = service
            print
            ("Located zookeeper Service: " + service.name)
            print("State:  " + zookeeper_service.serviceState + "\n" +
                  "Health:  " + zookeeper_service.healthSummary)

        if service.type == "HUE":
            print
            ("===================================================")
            print("Entered hue")
            hue_service = service
            print
            ("Located hue Service: " + service.name)
            print("State:  " + hue_service.serviceState + "\n" + "Health:  " +
                  hue_service.healthSummary)

        if service.type == "FLUME":
            print
            ("===================================================")
            print("Entered flume")
            flume_service = service
            print
            ("Located flume Service: " + service.name)
            print("State:  " + flume_service.serviceState + "\n" +
                  "Health:  " + flume_service.healthSummary)

        if service.type == "IMPALA":
            print
            ("===================================================")
            print("Entered impala")
            impala_service = service
            print
            ("Located impala Service: " + service.name)
            print("State:  " + impala_service.serviceState + "\n" +
                  "Health:  " + impala_service.healthSummary)

        if service.type == "HDFS":
            print
            ("===================================================")
            print("Entered hdfs")
            hdfs_service = service
            print
            ("Located hdfs Service: " + service.name)
            print("State:  " + hdfs_service.serviceState + "\n" + "Health:  " +
                  hdfs_service.healthSummary)

        if service.type == "SOLR":
            print
            ("===================================================")
            print("Entered solr")
            solr_service = service
            print
            ("Located solr Service: " + service.name)
            print("State:  " + solr_service.serviceState + "\n" + "Health:  " +
                  solr_service.healthSummary)

        if service.type == "HBASE":
            print
            ("===================================================")
            print("Entered hbase")
            hbase_service = service
            print
            ("Located hbase Service: " + service.name)
            print("State:  " + hbase_service.serviceState + "\n" +
                  "Health:  " + hbase_service.healthSummary)

        if service.type == "YARN":
            print
            ("===================================================")
            print("Entered yarn")
            yarn_service = service
            print
            ("Located yarn Service: " + service.name)
            print("State:  " + yarn_service.serviceState + "\n" + "Health:  " +
                  yarn_service.healthSummary)

        if service.type == "HIVE":
            print
            ("===================================================")
            print("Entered hive")
            hive_service = service
            print
            ("Located hive Service: " + service.name)
            print("State:  " + hive_service.serviceState + "\n" + "Health:  " +
                  hive_service.healthSummary)

        if service.type == "SPARK":
            print
            ("===================================================")
            print("Entered spark")
            spark_service = service
            print
            ("Located spark Service: " + service.name)
            print("State:  " + spark_service.serviceState + "\n" +
                  "Health:  " + spark_service.healthSummary)

        if service.type == "SENTRY":
            print
            ("===================================================")
            print("Entered sentry")
            sentry_service = service
            print
            ("Located sentry Service: " + service.name)
            print("State:  " + sentry_service.serviceState + "\n" +
                  "Health:  " + sentry_service.healthSummary)
Ejemplo n.º 30
0
#!/usr/bin/env python

import socket
import time
from cm_api.api_client import ApiResource
from cm_api.endpoints.services import ApiService
from cm_api.endpoints.services import ApiServiceSetupInfo

cm_host = 'ip-10-136-86-133'
api = ApiResource(cm_host, username='******', password='******')

cluster = api.get_cluster('cloudera-pe-test')

### HBase ###
hbase_service_name = "HBASE"
hbase_service_config = {
  'hdfs_service': 'hdfs01',
  'zookeeper_service': 'zookeeper01',
}
hbase_hm_host = "ip-10-136-86-133"
hbase_hm_config = { }
hbase_rs_hosts = [ ]
hbase_rs_hosts.append("ip-10-153-224-197")
hbase_rs_hosts.append("ip-10-169-69-118")
hbase_rs_config = {
  'hbase_hregion_memstore_flush_size': 1024000000,
  'hbase_regionserver_handler_count': 10,
  'hbase_regionserver_java_heapsize': 2048000000,
  'hbase_regionserver_java_opts': '',
}
hbase_thriftserver_service_name = "HBASETHRIFTSERVER"
Ejemplo n.º 31
0
#
#  else :
#   print >>sys.stderr, 'Cannot replicate from that cluster!'
#    return -1

  vm_version = cf['CM_VERSION']
  API = ApiResource(cmHost, cf['CM_PORT'],  version=cf['CM_VERSION'], username=cf['CM_USER'], password=cf['CM_PASSWD'], use_tls=False)
  LOG.debug('Connected to CM host on ' + cmHost)

  procUser = getUsername()
  LOG.debug('Process effective username is ' + procUser)
  procGroup= getGroupname()
  LOG.debug('Process effective group name is ' + procGroup)
  procUserGroups = getUserGroups(procUser)
  LOG.debug('All groups for user:'******', '.join(procUserGroups))
  cluster = API.get_cluster(cf['CLUSTER_NAME'])

  if action == 'listRepls':
    print >>sys.stdout,  '\n\tSearching replication schedules for user: '******' group(s): ' + ', '.join(procUserGroups)
    schedules = getAccessableSchedules(cf,cluster,procUser,procUserGroups)
    printReplicationSchedules(cf,schedules)
    return cf['RET_OK']

# get details about the replication the user is interested in
  if service == cf['HIVE_SERVICE']:
    path = getDatabaseLocation(cf,database)
    LOG.debug('DB location is ' + path)
    schedule = getHiveSchedule (cluster,service,database,table)
  else:
    schedule = getHdfsSchedule (cluster,service,path)
    path = schedule.hdfsArguments.sourcePath
Ejemplo n.º 32
0
def main():
  module = AnsibleModule(argument_spec=dict((argument, {'type': 'str'}) for argument in MODULE_ARGUMENTS))

  api = ApiResource('localhost', username=ADMIN_USER, password=ADMIN_PASS, version=10)
  cluster_name = CLUSTER_NAME

  manager = api.get_cloudera_manager()

  action_a = module.params.get('action', None)

  if action_a == 'create_cluster':
    license_a = module.params.get('license', None)
    version_a = module.params.get('version', None)

    cluster_list = [x.name for x in api.get_all_clusters()]
    if cluster_name in cluster_list:
      module.exit_json(changed=False, msg='Cluster exists')
    else:
      cluster = api.create_cluster(CLUSTER_NAME, fullVersion=version_a)
      if license_a == None:
        manager.begin_trial()
      else:
        manager.update_license(license_a.decode('base64'))
      module.exit_json(changed=True, msg='Cluster created')
  elif action_a in ['add_host', 'create_mgmt', 'deploy_parcel', 'deploy_hdfs_base', 'deploy_hdfs_httpfs', 'deploy_hdfs_dn', 'deploy_hdfs_ha', 'deploy_rm_ha', 'set_config', 'service', 'deploy_service', 'deploy_service_worker_nodes', 'deploy_base_roles', 'run_command', 'cluster','create_snapshot_policy']:
    # more complicated actions that need a created cluster go here
    cluster = api.get_cluster(cluster_name)
    host_map = dict((api.get_host(x.hostId).hostname, x.hostId) for x in cluster.list_hosts())

    # adds a host to the cluster
    # host_name should be in the internal DNS format, ip-xx-xx-xx.copute.internal
    if action_a == 'add_host':
      host_a = module.params.get('host', None)

      host_list = host_map.keys()
      if host_a in host_list:
        module.exit_json(changed=False, msg='Host already in cluster')
      else:
        try:
          cluster.add_hosts([host_a])
        except ApiException:
          # if a host isn't there, it could be because the agent didn't manage to connect yet
          # so let's wait a moment for it
          sleep(120)
          cluster.add_hosts([host_a])

        module.exit_json(changed=True, msg='Host added')

    # create management service and set it's basic configuration
    # this needs a separate function since management is handled
    # differently than the rest of services
    elif action_a == 'create_mgmt':
      host_a = module.params.get('host', None)

      # getting the management service is the only way to check if mgmt exists
      # an exception means there isn't one
      try:
        mgmt = manager.get_service()
        module.exit_json(changed=False, msg='Mgmt service already exists')
      except ApiException:
        pass

      mgmt = manager.create_mgmt_service(ApiServiceSetupInfo())

      # this is ugly... and I see no good way to unuglify it
      firehose_passwd = Popen("sudo grep com.cloudera.cmf.ACTIVITYMONITOR.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")
      reports_passwd = Popen("sudo grep com.cloudera.cmf.REPORTSMANAGER.db.password /etc/cloudera-scm-server/db.mgmt.properties | awk -F'=' '{print $2}'", shell=True, stdout=PIPE).stdout.read().rstrip("\n")

      # since there is no easy way of configuring the manager... let's do it here :(
      role_conf = defaultdict(dict)
      role_conf['ACTIVITYMONITOR'] = {
          'firehose_database_host': '{0}:7432'.format(host_a),
          'firehose_database_user': '******',
          'firehose_database_password': firehose_passwd,
          'firehose_database_type': 'postgresql',
          'firehose_database_name': 'amon',
          'firehose_heapsize': '268435456',
      }
      role_conf['EVENTSERVER'] = {
          'event_server_heapsize': '215964392'
      }
      role_conf['REPORTSMANAGER'] = {
          'headlamp_database_host': '{0}:7432'.format(host_a),
          'headlamp_database_user': '******',
          'headlamp_database_password': reports_passwd,
          'headlamp_database_type': 'postgresql',
          'headlamp_database_name': 'rman',
          'headlamp_heapsize': '215964392',
      }

      roles = ['ACTIVITYMONITOR', 'ALERTPUBLISHER', 'EVENTSERVER', 'HOSTMONITOR', 'SERVICEMONITOR', 'REPORTSMANAGER']
      # create mangement roles
      for role in roles:
        mgmt.create_role('{0}-1'.format(role), role, host_map[host_a])

      # update configuration of each
      for group in mgmt.get_all_role_config_groups():
        group.update_config(role_conf[group.roleType])

      mgmt.start().wait()
      # after starting this service needs time to spin up
      sleep(30)
      module.exit_json(changed=True, msg='Mgmt created and started')

    # deploy a given parcel on all hosts in the cluster
    # you can specify a substring of the version ending with latest, for example 5.3-latest instead of 5.3.5-1.cdh5.3.5.p0.4
    elif action_a == 'deploy_parcel':
      name_a = module.params.get('name', None)
      version_a = module.params.get('version', None)

      if "latest" in version_a:
        available_versions = [x.version for x in cluster.get_all_parcels() if x.product == name_a]
        if "-latest" in version_a:
          version_substr = match('(.+?)-latest', version_a).group(1)
        # if version is just "latest", try to check everything
        else:
          version_substr = ".*"
        try:
          [version_parcel] = [x for x in available_versions if re.match(version_substr, x) != None]
        except ValueError:
          module.fail_json(msg='Specified version {0} doesnt appear in {1} or appears twice'.format(version_substr, available_versions))
      else:
        version_parcel = version_a

      # we now go through various stages of getting the parcel
      # as there is no built-in way of waiting for an operation to complete
      # we use loops with sleep to get it done
      parcel = cluster.get_parcel(name_a, version_parcel)
      if parcel.stage == 'AVAILABLE_REMOTELY':
        parcel.start_download()

        while parcel.stage != 'DOWNLOADED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          sleep(10)

      if parcel.stage == 'DOWNLOADED':
        parcel.start_distribution()

        while parcel.stage != 'DISTRIBUTED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          if parcel.state.errors:
            raise Exception(str(parcel.state.errors))
          # sleep while hosts report problems after the download
          for i in range(12):
            sleep(10)
            if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
              break

      # since parcels are distributed automatically when a new host is added to a cluster
      # we can encounter the ,,ACTIVATING'' stage then
      if parcel.stage == 'DISTRIBUTED' or parcel.stage == 'ACTIVATING':
        if parcel.stage == 'DISTRIBUTED':
          parcel.activate()

        while parcel.stage != 'ACTIVATED':
          parcel = cluster.get_parcel(name_a, version_parcel)
          # this sleep has to be large because although the operation is very fast
          # it makes the management and cloudera hosts go bonkers, failing all of the health checks
          sleep(10)

        # sleep while hosts report problems after the distribution
        for i in range(60):
          sleep(10)
          if sum([1 for x in api.get_all_hosts(view='Full') if x.healthSummary != 'GOOD']) == 0:
            break

        module.exit_json(changed=True, msg='Parcel activated')

      if parcel.stage == 'ACTIVATED':
        module.exit_json(changed=False, msg='Parcel already activated')

      # if we get down here, something is not right
      module.fail_json(msg='Invalid parcel state')

    # deploy nodes for workers, according to SERVICE_WORKER_MAP
    # also give them sane names and init zookeeper and kafka ones
    # which need id's specified
    elif action_a == 'deploy_service_worker_nodes':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      role_name = SERVICE_WORKER_MAP[service_a]['name']
      full_role_name = SERVICE_WORKER_MAP[service_a]['formatstring']

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      nodes = [x for x in service.get_all_roles() if role_name in x.name]

      # if host already has the given group, we should skip it
      if host_map[host_a] in [x.hostRef.hostId for x in nodes]:
        module.exit_json(changed=False, msg='Host already is a {0}'.format(role_name))
      # find out the highest id that currently exists
      else:
        node_names = [x.name for x in nodes]
        if len(node_names) == 0:
          # if no nodes, start numbering from 1
          node_i = 1
        else:
          # take the max number and add 1 to it
          node_i = max([int(x.split('-')[-1]) for x in node_names]) + 1

        if service_name == 'ZOOKEEPER':
          role = service.create_role(full_role_name.format(node_i), 'SERVER', host_a)
          # zookeeper needs a per-node ID in the configuration, so we set it now
          role.update_config({'serverId': node_i})
        elif service_name == 'KAFKA':
          role = service.create_role(full_role_name.format(node_i), role_name, host_a)
          # kafka needs a per-node ID in the configuration, so we set it now
          role.update_config({'broker.id': node_i})
        else:
          service.create_role(full_role_name.format(node_i), role_name, host_a)

        module.exit_json(changed=True, msg='Added host to {0} role'.format(role_name))

    # deploy a service. just create it, don't do anything more
    # this is needed maily when we have to set service properties before role deployment
    elif action_a == 'deploy_service':
      name_a = module.params.get('name', None)

      if not name_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(name_a))
      service_name = SERVICE_MAP[name_a]
      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
        module.exit_json(changed=True, msg='{0} service created'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} service already exists'.format(service_name))

    # deploy the base hdfs roles (the namenode and secondary)
    # this doesn't create the service, as at least one datanode should already be added!
    # the format also requires certain properties to be set before we run it
    elif action_a == 'deploy_hdfs_base':
      nn_host_a = module.params.get('nn_host', None)
      sn_host_a = module.params.get('sn_host', None)

      changed = False

      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]

      # don't create a secondary namenode when:
      #- there is one that already exists
      #- there is a second namenode, which means we have HA and don't need a secondary
      if not 'HDFS-SECONDARYNAMENODE' in hdfs_roles and not 'HDFS-NAMENODE-2' in hdfs_roles:
        hdfs.create_role('HDFS-SECONDARYNAMENODE', 'SECONDARYNAMENODE', sn_host_a)
        changed = True

      # create a namenode and format it's FS
      # formating the namenode requires at least one datanode and secondary namenode already in the cluster!
      if not 'HDFS-NAMENODE' in hdfs_roles:
        hdfs.create_role('HDFS-NAMENODE', 'NAMENODE', nn_host_a)
        for command in hdfs.format_hdfs('HDFS-NAMENODE'):
          if command.wait().success == False:
            module.fail_json(msg='Failed formating HDFS namenode with error: {0}'.format(command.resultMessage))
        changed = True

      module.exit_json(changed=changed, msg='Created HDFS service & NN roles')

    # enable HttpFS for HDFS
    # HUE require this for support HA in HDFS
    elif action_a == 'deploy_hdfs_httpfs':
      host_a = module.params.get('host', None)
      
      hdfs = cluster.get_service('HDFS')
      hdfs_roles = [x.name for x in hdfs.get_all_roles()]
      
      # don't install second instance of HttpFS
      if len([role for role in hdfs_roles if 'HDFS-HTTPFS' in role]) != 0:
        module.exit_json(changed=False, msg='HDFS HttpFS service already exists')
       
      hdfs.create_role('HDFS-HTTPFS-1', 'HTTPFS', host_map[host_a]) 
        
      module.exit_json(changed=True, msg='HDFS HttpFS service created')
      
    # enable HA for HDFS
    # this deletes the secondary namenode and creates a second namenode in it's place
    # also, this spawns 3 journal node and 2 failover controller roles
    elif action_a == 'deploy_hdfs_ha':
      sn_host_a = module.params.get('sn_host', None)
      jn_names_a = [module.params.get('jn1_host', None), module.params.get('jn2_host', None), module.params.get('jn3_host', None)]

      hdfs = cluster.get_service('HDFS')

      # if there's a second namenode, this means we already have HA enabled
      if not 'HDFS-NAMENODE-2' in [x.name for x in hdfs.get_all_roles()]:
        # this is bad and I should feel bad
        # jns is a list of dictionaries, each dict passes the required journalnode parameters
        jns = [{'jnHostId': host_map[jn_name], 'jnEditsDir': '/data0/hadoop/journal', 'jnName': 'HDFS-JOURNALNODE-{0}'.format(i + 1)} for i, jn_name in enumerate(jn_names_a)]

        # this call is so long because we set some predictable names for the sevices
        command = hdfs.enable_nn_ha('HDFS-NAMENODE', host_map[sn_host_a], 'nameservice1', jns, zk_service_name='ZOOKEEPER',
                                    active_fc_name='HDFS-FAILOVERCONTROLLER-1', standby_fc_name='HDFS-FAILOVERCONTROLLER-2', standby_name='HDFS-NAMENODE-2')

        children = command.wait().children
        for command_children in children:
          # The format command is expected to fail, since we already formated the namenode
          if command_children.name != 'Format' and command.success == False:
            module.fail_json(msg='Command {0} failed when enabling HDFS HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for HDFS service')
      else:
        module.exit_json(changed=False, msg='HDFS HA already enabled')
    # enable HA for YARN
    elif action_a == 'deploy_rm_ha':
      sn_host_a = module.params.get('sn_host', None)

      yarn = cluster.get_service('YARN')

      # if there are two roles matching to this name, this means HA for YARN is enabled
      if len([0 for x in yarn.get_all_roles() if match('^YARN-RESOURCEMANAGER.*$', x.name) != None]) == 1:
        command = yarn.enable_rm_ha(sn_host_a, zk_service_name='ZOOKEEPER')
        children = command.wait().children
        for command_children in children:
          if command.success == False:
            module.fail_json(msg='Command {0} failed when enabling YARN HA with error {1}'.format(command_children.name, command_children.resultMessage))
        module.exit_json(changed=True, msg='Enabled HA for YARN service')
      else:
        module.exit_json(changed=False, msg='YARN HA already enabled')

    # deploy the base roles for a service, according to BASE_SERVICE_ROLE_MAP
    # after the deployments run commands specified in BASE_SERVICE_ROLE_MAP
    elif action_a == 'deploy_base_roles':
      host_a = module.params.get('host', None)
      service_a = module.params.get('service', None)

      service_name = SERVICE_MAP[service_a]
      changed = False

      if not service_name in [x.name for x in cluster.get_all_services()]:
        service = cluster.create_service(service_name, service_name)
      else:
        service = cluster.get_service(service_name)

      service_roles = [x.name for x in service.get_all_roles()]

      # create each service from the map
      for (role_name, cloudera_name) in BASE_SERVICE_ROLE_MAP[service_a].items():
        # check if role already exists, script cant compare it directly
        # after enabling HA on YARN roles will have random strings in names
        if len([0 for x in service_roles if match(role_name, x) != None]) == 0:
          service.create_role(role_name, cloudera_name, host_a)
          changed = True

          # init commmands
          if role_name in SERVICE_INIT_COMMANDS.keys():
            for command_to_run in SERVICE_INIT_COMMANDS[role_name]:
              # different handling of commands specified by name and
              # ones specified by an instance method
              if ismethod(command_to_run):
                command = command_to_run(service)
              else:
                command = service.service_command_by_name(command_to_run)

              if command.wait().success == False:
                module.fail_json(msg='Running {0} failed with {1}'.format(command_to_run, command.resultMessage))

      if changed == True:
        module.exit_json(changed=True, msg='Created base roles for {0}'.format(service_name))
      else:
        module.exit_json(changed=False, msg='{0} base roles already exist'.format(service_name))

    # set config values for a given service/role
    elif action_a == 'set_config':
      entity_a = module.params.get('entity', None)
      service_a = module.params.get('service', None)
      role_a = module.params.get('role', None)
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)

      if not service_a in SERVICE_MAP:
        module.fail_json(msg='Unknown service: {0}'.format(service_a))

      # since management is handled differently, it needs a different service
      if service_a == 'management':
        service = manager.get_service()
      elif service_a == 'cm':
        service = manager
      else:
        service = cluster.get_service(SERVICE_MAP[service_a])

      # role and service configs are handled differently
      if entity_a == 'service':
        prev_config = service.get_config()
        curr_config = service.update_config({name_a: value_a})
        if service_a == 'cm':
          prev_config = [prev_config]
          curr_config = [curr_config]
        module.exit_json(changed=(str(prev_config[0]) != str(curr_config[0])), msg='Config value for {0}: {1}'.format(name_a, curr_config[0][name_a]))

      elif entity_a == 'role':
        if not role_a in ROLE_MAP:
          module.fail_json(msg='Unknown role: {0}'.format(service))

        role = service.get_role_config_group(ROLE_MAP[role_a])
        prev_config = role.get_config()
        curr_config = role.update_config({name_a: value_a})
        module.exit_json(changed=(str(prev_config) != str(curr_config)), msg='Config value for {0}: {1}'.format(name_a, curr_config[name_a]))

      else:
        module.fail_json(msg='Invalid entity, must be one of service, role')

    # handle service state
    # currently this only can start/restart a service
    elif action_a == 'service':
      state_a = module.params.get('state', None)
      service_a = module.params.get('service', None)

      try:
        if service_a == 'cm':
          service = manager.get_service()
        else:
          service = cluster.get_service(SERVICE_MAP[service_a])
      except ApiException:
        module.fail_json(msg='Service does not exist')

      # when starting a service, we also deploy the client config for it
      if state_a == 'started':
        if service.serviceState == 'STARTED':
          module.exit_json(changed=False, msg='Service already running')
        method = service.start
        verb = "start"
      elif state_a == 'restarted':
        method = service.restart
        verb = "restart"

      try:
        command = service.deploy_client_config()
        if command.wait().success == False:
          module.fail_json(msg='Deploying client config failed with {0}'.format(command.resultMessage))
      # since there is no way to check if a service handles client config deployments
      # we try our best and pass the exception if it doesn't
      except ApiException, AttributeError:
        pass

      method().wait()
      # we need to wait for cloudera checks to complete...
      # otherwise it will report as failing
      sleep(10)
      for i in range(24):
        sleep(10)
        service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
        if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
          break
      service = manager.get_service() if service_a == "cm" else cluster.get_service(SERVICE_MAP[service_a])
      if service.serviceState == 'STARTED' and service.healthSummary == 'GOOD':
        module.exit_json(changed=True, msg='Service {0} successful'.format(verb))
      else:
        module.fail_json(msg='Service {0} failed'.format(verb))

    # handle cluster
    # currently this only can restart
    elif action_a == 'cluster':
      state_a = module.params.get('state', None)

      if state_a == 'restarted':
        command = cluster.restart(redeploy_client_configuration=True)
        if command.wait().success == False:
          module.fail_json(msg='Cluster resart failed with {0}'.format(command.resultMessage))
        else:
          module.exit_json(changed=True, msg='Cluster restart successful')

    # Snapshot policy
    # only create is supported
    elif action_a == 'create_snapshot_policy':
      name_a = module.params.get('name', None)
      value_a = module.params.get('value', None)
      service_a = module.params.get('service', None)
      service = cluster.get_service(SERVICE_MAP[service_a])
      payload=loads(value_a)
      # checking if policy already exists. Exception is expected when configure for the first time.
      try: 
        test = service.get_snapshot_policy(name_a)
        module.exit_json(changed=False, msg='Defined policy already exists')
      except ApiException:
        pass
      try:
        command = service.create_snapshot_policy(payload)
        module.exit_json(changed=True, msg='Snapshot policy was created.')
      except ApiException, AttributeError:
        module.fail_json(msg='ERROR in creating snapshot policy.')
	        --- CHECK ROLES HEALTH STATUS START ---\n\n\n"""

	api = ApiResource(cm_host, 7180, user , password)
	# Get a list of all clusters
	cdh = None
	if (api == None):
		print "COnnect error"
	try:
		for c in api.get_all_clusters():
			cdh = c
		print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," PRESENT CLUSTER: ",cdh.name
	except:
		print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()) )," Error get cluster"
		send_alert_mail("CONNECT_ERROR","","","");
	if cdh != None:
		for s in api.get_cluster(cdh.name).get_all_services():
			#print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," SERVICES: ",s.name
			cluster = api.get_cluster(cdh.name);
			service_mapred=cluster.get_service(s.name)
			roles=service_mapred.get_all_roles()
			for r in service_mapred.get_all_roles():
				#print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," Role ",r.name," is in status [ ",r.healthSummary," ]"
				#check if not GOOD and not BAD
				if (r.healthSummary != "GOOD" and r.roleState == "STARTED" and r.healthSummary != "BAD"):
					send_alert_mail(r.healthSummary,r.name,r.hostRef.hostId,s.name)
					print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," SERVICES: ",s.name
					print strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))," Role ",r.name," is in status [ ",r.healthSummary," ]"
	print """\n\n\n		--- CHECK ROLES HEALTH NOT GOOD STATUS END ---
===================================================================================="""	
except:
	print "Can't not connect to CDH API"
Ejemplo n.º 34
0
def set_up_cluster():
    # get a handle on the instance of CM that we have running
    api = ApiResource(cm_host, cm_port, cm_username, cm_password, version=7)

    # get the CM instance
    cm = ClouderaManager(api)

    # activate the CM trial license
    cm.begin_trial()

    # create the management service
    service_setup = ApiServiceSetupInfo(name=cm_service_name, type="MGMT")
    cm.create_mgmt_service(service_setup)

    # install hosts on this CM instance
    cmd = cm.host_install(host_username, host_list, password=host_password, cm_repo_url=cm_repo_url) 
    print "Installing hosts. This might take a while."
    while cmd.success == None:
	sleep(5)
        cmd = cmd.fetch()

    if cmd.success != True:
        print "cm_host_install failed: " + cmd.resultMessage
        exit(0)

    print "cm_host_install succeeded"

    # first auto-assign roles and auto-configure the CM service
    cm.auto_assign_roles()
    cm.auto_configure()

    # create a cluster on that instance
    cluster = create_cluster(api, cluster_name, cdh_version)

    # add all our hosts to the cluster
    cluster.add_hosts(host_list)

    cluster = api.get_cluster("Cluster 1")

    parcels_list = []
    # get and list all available parcels
    print "Available parcels:"
    for p in cluster.get_all_parcels():
        print '\t' + p.product + ' ' + p.version
        if p.version.startswith(cdh_version_number) and p.product == "CDH":
	    parcels_list.append(p)

    if len(parcels_list) == 0:
        print "No " + cdh_version + " parcel found!"
        exit(0)

    cdh_parcel = parcels_list[0]
    for p in parcels_list:
        if p.version > cdh_parcel.version:
	    cdh_parcel = p

    # download the parcel
    print "Starting parcel download. This might take a while."
    cmd = cdh_parcel.start_download()
    if cmd.success != True:
        print "Parcel download failed!"
        exit(0)

    # make sure the download finishes
    while cdh_parcel.stage != 'DOWNLOADED':
	sleep(5)
        cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name)

    print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded"

    # distribute the parcel
    print "Starting parcel distribution. This might take a while."
    cmd = cdh_parcel.start_distribution()
    if cmd.success != True:
        print "Parcel distribution failed!"
        exit(0)


    # make sure the distribution finishes
    while cdh_parcel.stage != "DISTRIBUTED":
	sleep(5)
	cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name)

    print cdh_parcel.product + ' ' + cdh_parcel.version + " distributed"

    # activate the parcel
    cmd = cdh_parcel.activate()
    if cmd.success != True:
        print "Parcel activation failed!"
        exit(0)

    # make sure the activation finishes
    while cdh_parcel.stage != "ACTIVATED":
	cdh_parcel = get_parcel(api, cdh_parcel.product, cdh_parcel.version, cluster_name)

    print cdh_parcel.product + ' ' + cdh_parcel.version + " activated"

    # inspect hosts and print the result
    print "Inspecting hosts. This might take a few minutes."

    cmd = cm.inspect_hosts()
    while cmd.success == None:
        cmd = cmd.fetch()

    if cmd.success != True:
        print "Host inpsection failed!"
        exit(0)

    print "Hosts successfully inspected: \n" + cmd.resultMessage

    # create all the services we want to add; we will only create one instance
    # of each
    for s in service_types_and_names.keys():
        service = cluster.create_service(service_types_and_names[s], s)

    # we will auto-assign roles; you can manually assign roles using the
    # /clusters/{clusterName}/services/{serviceName}/role endpoint or by using
    # ApiService.createRole()
    cluster.auto_assign_roles()
    cluster.auto_configure()

    # this will set up the Hive and the reports manager databases because we
    # can't auto-configure those two things
    hive = cluster.get_service(service_types_and_names["HIVE"])
    hive_config = { "hive_metastore_database_host" : hive_metastore_host, \
                    "hive_metastore_database_name" : hive_metastore_name, \
                    "hive_metastore_database_password" : hive_metastore_password, \
	    	    "hive_metastore_database_port" : hive_metastore_database_port, \
		    "hive_metastore_database_type" : hive_metastore_database_type }
    hive.update_config(hive_config)

    # start the management service
    cm_service = cm.get_service()
    cm_service.start().wait()
    
    # this will set the Reports Manager database password
    # first we find the correct role
    rm_role = None
    for r in cm.get_service().get_all_roles():
        if r.type == "REPORTSMANAGER":
            rm_role = r

    if rm_role == None:
	print "No REPORTSMANAGER role found!"
        exit(0)

    # then we get the corresponding role config group -- even though there is
    # only once instance of each CM management service, we do this just in case
    # it is not placed in the base group
    rm_role_group = rm_role.roleConfigGroupRef
    rm_rcg = get_role_config_group(api, rm_role.type, \
                rm_role_group.roleConfigGroupName, None)

    # update the appropriate fields in the config
    rm_rcg_config = { "headlamp_database_host" : reports_manager_host, \
                      "headlamp_database_name" : reports_manager_name, \
                      "headlamp_database_user" : reports_manager_username, \
                      "headlamp_database_password" : reports_manager_password, \
 		      "headlamp_database_type" : reports_manager_database_type }

    rm_rcg.update_config(rm_rcg_config)


    # restart the management service with new configs
    cm_service.restart().wait()

    # execute the first run command
    print "Excuting first run command. This might take a while."
    cmd = cluster.first_run()

    while cmd.success == None:
        cmd = cmd.fetch()

    if cmd.success != True:
        print "The first run command failed: " + cmd.resultMessage()
        exit(0)

    print "First run successfully executed. Your cluster has been set up!"
Ejemplo n.º 35
0
def main():
    """
    TODO: This probably needs some work.  You get the idea though.  
    An example of how to do a bulk config update to Cloudera Manager.  This is helpful if you have a bunch of changes
    That you want to make but don't want to use the GUI.  
    """

    parser = argparse.ArgumentParser(
        description='Cloudera Manager Bulk Config Update Script')
    parser.add_argument('-H',
                        '--host',
                        '--hostname',
                        action='store',
                        dest='hostname',
                        required=True,
                        help='CM server host')
    parser.add_argument('-p',
                        '--port',
                        action='store',
                        dest='port',
                        type=int,
                        default=7180,
                        help='example: 7180')
    parser.add_argument('-u',
                        '--user',
                        '--username',
                        action='store',
                        dest='username',
                        required=True,
                        help='example: admin')
    parser.add_argument('-c',
                        '--cluster',
                        action='store',
                        dest='cluster',
                        required=True,
                        help='example: hadrian-cluster')
    args = parser.parse_args()
    password = getpass.getpass(
        'Please enter your Cloudera Manager passsword: ')

    # read configuration files:
    for i in os.listdir('./conf/' + args.cluster):
        config.read('./conf/' + args.cluster + '/' + i)

    api = ApiResource(args.hostname, args.port, args.username, password)
    cluster = api.get_cluster(args.cluster)
    services = cluster.get_all_services()

    # update services based with configuration file parameters
    for service in services:
        if config_grabber.has_section(service.type):
            service.update_config(svc_config=config_grabber(service.name +
                                                            '-svc-config'))
            config_groups = config_grabber(service.name)['config_groups']
            for config_group in config_groups.split(','):
                print section
                temp_config_group = service.get_role_config_group(section)
                temp_config_group.update_config(config_grabber(section))
        else:
            print 'unknown service: ' + service.name

    print 'Starting final client configuration deployment for all services.'
    cmd = cluster.deploy_client_config()
    if not cmd.wait(CMD_TIMEOUT).success:
        print 'Failed to deploy client configuration.'
Ejemplo n.º 36
0
      usage()
      return RET_BADOPTS

# check argument compatibility
  if args:
    print >>sys.stderr, '\n\tUnknown trailing argument:', args
    usage()
    return RET_BADOPTS

  if  path == None :
    print >>sys.stderr, '\n\tPlease specify a pathe.'
    usage()
    return RET_BADOPTS

  API = ApiResource(cmHost, CM_PORT,  version=CM_VERSION, username=CM_USER, password=CM_PASSWD, use_tls=True)
  LOG.debug('Connected to CM host on ' + cmHost)

  procUser = getUsername()
  LOG.debug('Process effective username is ' + procUser)

  cluster = API.get_cluster(CLUSTER_NAME)


  return RET_OK

#
# The 'main' entry
#
if __name__ == '__main__':
  sys.exit(main(sys.argv))
Ejemplo n.º 37
0
from cm_api.api_client import ApiResource

CM_HOST = "127.0.0.1"
ADMIN_USER = "******"
ADMIN_PASS = "******"

API = ApiResource(CM_HOST,
                  version=14,
                  username=ADMIN_USER,
                  password=ADMIN_PASS)
MANAGER = API.get_cloudera_manager()
mgmt = MANAGER.get_service()

print "restart mgmt..."
mgmt.restart().wait()

print "TIP cluster..."
tip = API.get_cluster("TIP")
tip.restart().wait()
Ejemplo n.º 38
0
class ClouderaManager(object):
    """
    The complete orchestration of a cluster from start to finish assuming all the hosts are
    configured and Cloudera Manager is installed with all the required databases setup.

    Handle all the steps required in creating a cluster. All the functions are built to function
    idempotently. So you should be able to resume from any failed step but running thru the
    __class__.setup()
    """
    def __init__(self, module, config, trial=False, license_txt=None):
        self.api = ApiResource(config['cm']['host'],
                               username=config['cm']['username'],
                               password=config['cm']['password'])
        self.manager = self.api.get_cloudera_manager()
        self.config = config
        self.module = module
        self.trial = trial
        self.license_txt = license_txt
        self.cluster = None

    def enable_license(self):
        """
        Enable the requested license, either it's trial mode or a full license is entered and
        registered.
        """
        try:
            _license = self.manager.get_license()
        except ApiException:
            print_json(type="LICENSE", msg="Enabling license")
            if self.trial:
                self.manager.begin_trial()
            else:
                if license_txt is not None:
                    self.manager.update_license(license_txt)
                else:
                    fail(
                        self.module,
                        'License should be provided or trial should be specified'
                    )

            try:
                _license = self.manager.get_license()
            except ApiException:
                fail(self.module, 'Failed enabling license')
        print_json(type="LICENSE",
                   msg="Owner: {}, UUID: {}".format(_license.owner,
                                                    _license.uuid))

    def create_cluster(self):
        """
        Create a cluster and add hosts to the cluster. A new cluster is only created
        if another one doesn't exist with the same name.
        """
        print_json(type="CLUSTER", msg="Creating cluster")
        cluster_config = self.config['cluster']
        try:
            self.cluster = self.api.get_cluster(cluster_config['name'])
        except ApiException:
            print_json(type="CLUSTER",
                       msg="Creating Cluster entity: {}".format(
                           cluster_config['name']))
            self.cluster = self.api.create_cluster(
                cluster_config['name'], cluster_config['version'],
                cluster_config['fullVersion'])

        cluster_hosts = [
            self.api.get_host(host.hostId).hostname
            for host in self.cluster.list_hosts()
        ]
        hosts = []
        for host in cluster_config['hosts']:
            if host not in cluster_hosts:
                hosts.append(host)
        self.cluster.add_hosts(hosts)

    def activate_parcels(self):
        print_json(type="PARCELS", msg="Setting up parcels")
        for parcel_cfg in self.config['parcels']:
            parcel = Parcels(self.module, self.manager, self.cluster,
                             parcel_cfg.get('version'), parcel_cfg.get('repo'),
                             parcel_cfg.get('product', 'CDH'))
            parcel.download()
            parcel.distribute()
            parcel.activate()

    @retry(attempts=20, delay=5)
    def wait_inspect_hosts(self, cmd):
        """
        Inspect all the hosts. Basically wait till the check completes on all hosts.

        :param cmd: A command instance used for tracking the status of the command
        """
        print_json(type="HOSTS", msg="Inspecting hosts")
        cmd = cmd.fetch()
        if cmd.success is None:
            raise ApiException("Waiting on command {} to finish".format(cmd))
        elif not cmd.success:
            if (cmd.resultMessage is not None
                    and 'is not currently available for execution'
                    in cmd.resultMessage):
                raise ApiException('Retry Command')
            fail(self.module, 'Host inspection failed')
        print_json(type="HOSTS",
                   msg="Host inspection completed: {}".format(
                       cmd.resultMessage))

    def deploy_mgmt_services(self):
        """
        Configure, deploy and start all the Cloudera Management Services.
        """
        print_json(type="MGMT", msg="Deploying Management Services")
        try:
            mgmt = self.manager.get_service()
            if mgmt.serviceState == 'STARTED':
                return
        except ApiException:
            print_json(type="MGMT",
                       msg="Management Services don't exist. Creating.")
            mgmt = self.manager.create_mgmt_service(ApiServiceSetupInfo())

        for role in config['services']['MGMT']['roles']:
            if not len(mgmt.get_roles_by_type(role['group'])) > 0:
                print_json(type="MGMT",
                           msg="Creating role for {}".format(role['group']))
                mgmt.create_role('{}-1'.format(role['group']), role['group'],
                                 role['hosts'][0])

        for role in config['services']['MGMT']['roles']:
            role_group = mgmt.get_role_config_group('mgmt-{}-BASE'.format(
                role['group']))
            role_group.update_config(role.get('config', {}))

        mgmt.start().wait()
        if self.manager.get_service().serviceState == 'STARTED':
            print_json(type="MGMT", msg="Management Services started")
        else:
            fail(
                self.module,
                "[MGMT] Cloudera Management services didn't start up properly")

    def service_orchestrate(self, services):
        """
        Create, pre-configure provided list of services
        Stop/Start those services
        Perform and post service startup actions

        :param services: List of Services to perform service specific actions
        """
        service_classes = []

        # Create and pre-configure provided services
        for service in services:
            service_config = self.config['services'].get(service.upper())
            if service_config:
                svc = getattr(sys.modules[__name__], service)(self.cluster,
                                                              service_config)
                if not svc.started:
                    svc.deploy()
                    svc.pre_start()
                service_classes.append(svc)

        print_json(type="CLUSTER",
                   msg="Starting services: {} on Cluster".format(services))

        # Deploy all the client configs, since some of the services depend on other services
        # and is essential that the client configs are in place
        self.cluster.deploy_client_config()

        # Start each service and run the post_start actions for each service
        for svc in service_classes:
            # Only go thru the steps if the service is not yet started. This helps with
            # re-running the script after fixing errors
            if not svc.started:
                svc.start()
                svc.post_start()

    def setup(self):
        # TODO(rnirmal): Cloudera Manager SSL?

        # Enable a full license or start a trial
        self.enable_license()

        # Create the cluster entity and associate hosts
        self.create_cluster()

        # Download and activate the parcels
        self.activate_parcels()

        # Inspect all the hosts
        self.wait_inspect_hosts(self.manager.inspect_hosts())

        # Create Management services
        self.deploy_mgmt_services()

        # Configure and Start base services
        self.service_orchestrate(BASE_SERVICES)

        # Configure and Start remaining services
        self.service_orchestrate(ADDITIONAL_SERVICES)
Ejemplo n.º 39
0
def main(argv):
	CM_HOST         = "localhost"
	CM_PORT         = 7180
	CM_USER         = "******"
	CM_PASSWD       = "admin"
	CMD_TIMEOUT = 180
	
	#Configurations
	HDFS_CONF = {
	}
	
	NAMENODE_CONF = {
	        'dfs_name_dir_list': '/dfs/nn',
	        'dfs_namenode_servicerpc_address': 8022,
		'namenode_java_heapsize': 154140672,
	}
	
	SECONDARY_CONF = {
	        'fs_checkpoint_dir_list': '/dfs/snn',
		'secondary_namenode_java_heapsize': 154140672,
	}
	
	DATANODE_CONF = {
	        'dfs_data_dir_list': '/data/1/dfs/dn,/data/2/dfs/dn,/data/3/dfs/dn',
	        'dfs_datanode_handler_count': 10,
	        'dfs_datanode_du_reserved': 2180395417,
	        'dfs_datanode_max_locked_memory': 983564288,
	        'datanode_java_heapsize': 286261248,
	}
	
	YARN_CONF = {
		'hdfs_service':'hdfs',
	}

	RSRCMAN_CONF = {
		'resource_manager_java_heapsize': 154140672,
		'yarn_scheduler_maximum_allocation_mb': 1513,
		'yarn_scheduler_maximum_allocation_vcores': 2,
	}

	JOBHIST_CONF = {
		'mr2_jobhistory_java_heapsize': 154140672,
	}

	NODEMAN_CONF = {
		'yarn_nodemanager_local_dirs': '/yarn/nm',
		'yarn_nodemanager_resource_cpu_vcores': 2,
		'yarn_nodemanager_resource_memory_mb': 1513,
	}

	
	
	#Parser Options
	parser = OptionParser()
	parser.set_defaults(action='')
	parser.add_option("-a", "--add", action="store_const", const="add", dest="action", help="add the list of hosts to the named cluster")
	parser.add_option("-r", "--remove", action="store_const", const="remove", dest="action", help="remove the list of hosts from the named cluster")
	parser.add_option("-d", "--deploy", action="store_const", const="deploy", dest="action", help="deploy the list of hosts as a new cluster with the given name")
	parser.add_option("--delete", action="store_const", const="delete", dest="action", help="delete the named cluster")
	parser.add_option("--name", dest="name", help="declare the cluster name to be created or to interact with")
	parser.add_option("--hosts", dest="hosts", help="comma delimited list of hosts to be added/removed")
	
	
	(opts,args) = parser.parse_args()
	CLUSTER_NAME = opts.name
	if opts.hosts and len(opts.hosts) > 1:
		HOSTNAMES = opts.hosts.split(",")
	elif opts.hosts:
		HOSTNAMES = opts.hosts
	else:
		HOSTNAMES = ''
	
	ROLEHASH = []
	if HOSTNAMES:
		for host in HOSTNAMES:
			ROLEHASH.append(hashlib.md5(host).hexdigest())

	api = ApiResource(CM_HOST, CM_PORT, CM_USER, CM_PASSWD)
	
	
	#Deploy a new cluster
	if(opts.action == "deploy"):	
		#Create Cluster
		print "Creating cluster..."
		cluster = api.create_cluster(CLUSTER_NAME, "CDH5")
		cluster.add_hosts(HOSTNAMES)

		#Create HDFS Service and Roles
		print "Creating HDFS Service and Roles..."
		hdfs = cluster.create_service("hdfs", "HDFS")


		namenode        = hdfs.create_role("hdfs-NAMENODE-" + ROLEHASH[0], "NAMENODE", HOSTNAMES[0])
		secnamenode     = hdfs.create_role("hdfs-SECONDARYNAMENODE-" + ROLEHASH[0], "SECONDARYNAMENODE", HOSTNAMES[0])
		for i in range(len(HOSTNAMES)-1):
			datanode = hdfs.create_role("hdfs-DATANODE-" + ROLEHASH[i+1], "DATANODE", HOSTNAMES[i+1])
		
		#Configure HDFS
		print "Configuring HDFS..."
		hdfs.update_config(svc_config = HDFS_CONF)

		for roleGroup in hdfs.get_all_role_config_groups():
			if roleGroup.roleType == "NAMENODE":
				roleGroup.update_config(NAMENODE_CONF)
			elif roleGroup.roleType == "SECONDARYNAMENODE":
				roleGroup.update_config(SECONDARY_CONF)
			elif roleGroup.roleType == "DATANODE":
				roleGroup.update_config(DATANODE_CONF)       

		#Start HDFS
		#format_hdfs takes a list of NameNodes
		print "Formatting HDFS..."
		cmd = hdfs.format_hdfs('hdfs-NAMENODE-' + ROLEHASH[0])[0]
		if not cmd.wait(CMD_TIMEOUT).success:
			print "Failed to format HDFS"
		
		print "Starting HDFS..."
		cmd = hdfs.start()
		if not cmd.wait(CMD_TIMEOUT).success:
			raise Exception("Failed to start HDFS")

		cmd = hdfs.create_hdfs_tmp()
		if not cmd.wait(CMD_TIMEOUT).success:
			raise Exception("Failed to create HDFS /tmp")


		for role in hdfs.get_all_roles():
			cmd = hdfs.deploy_client_config(role.name)
			if not cmd.wait(CMD_TIMEOUT).success:
				raise Exception("Failed to deploy client config. Role: " + role.name)

		#Create YARN Service and Roles
		print "Creating YARN Service and Roles..."
		yarn = cluster.create_service("yarn", "YARN")

		resourceman = yarn.create_role("yarn-RESOURCEMANAGER-" + ROLEHASH[0], "RESOURCEMANAGER", HOSTNAMES[0])
		jobhist = yarn.create_role("yarn-JOBHISTORY-" + ROLEHASH[0], "JOBHISTORY", HOSTNAMES[0])
		for i in range(len(HOSTNAMES)-1):
			nodeman = yarn.create_role("yarn-NODEMANAGER-" + ROLEHASH[i+1], "NODEMANAGER", HOSTNAMES[i+1])
	
		#Configure YARN
		print "Configuring YARN..."
		yarn.update_config(svc_config = YARN_CONF)

		for roleGroup in yarn.get_all_role_config_groups():
			if roleGroup.roleType == "RESOURCEMANAGER":
				roleGroup.update_config(RSRCMAN_CONF)
			elif roleGroup.roleType == "JOBHISTORY":
				roleGroup.update_config(JOBHIST_CONF)
			elif roleGroup.roleType == "NODEMANAGER":
				roleGroup.update_config(NODEMAN_CONF)

		#Start YARN
		print "Starting YARN..."
		cmd = yarn.create_yarn_job_history_dir()
		if not cmd.wait(CMD_TIMEOUT).success:
			raise Exception("Failed to create Job History Directory")
		
		cmd = yarn.create_yarn_node_manager_remote_app_log_dir()
		if not cmd.wait(CMD_TIMEOUT).success:
			raise Exception("Failed to create NodeManager remote application log directory")
	
		cmd = yarn.start()
		if not cmd.wait(CMD_TIMEOUT).success:
			raise Exception("Failed to start YARN")
		
		for role in yarn.get_all_roles():
			cmd = yarn.deploy_client_config(role.name)
			if not cmd.wait(CMD_TIMEOUT).success:
				raise Exception("Failed to deploy client config. Role: " + role.name)

		#SUCCESS!
		print "Cluster succesfully deployed."
			
	#Add new nodes		
	elif(opts.action == "add"):
		print "Adding hosts..."
		cluster = api.get_cluster(CLUSTER_NAME);
		cluster.add_hosts(HOSTNAMES);
		
		print "Configurng HDFS Roles..."
		hdfs = cluster.get_service("hdfs")
		for i in range(len(HOSTNAMES)):
			datanode = hdfs.create_role("hdfs-DATANODE-" + ROLEHASH[i], "DATANODE", HOSTNAMES[i])
			datanode.update_config(DATANODE_CONF);
			cmds = hdfs.start_roles("hdfs-DATANODE-" + ROLEHASH[i])
			for cmd in cmds:
				if not cmd.wait(CMD_TIMEOUT).success:
					raise Exception(cmd.name)	
			cmd = hdfs.deploy_client_config("hdfs-DATANODE-" + ROLEHASH[i])	
			if not cmd.wait(CMD_TIMEOUT).success:
				raise Exception("Failed to deploy client config hdfs-DATANODE-" + ROLEHASH[i])
			
		print "Configuring YARN roles..."
		yarn = cluster.get_service("yarn")
		for i in range(len(HOSTNAMES)):
			nodeman = yarn.create_role("yarn-NODEMANAGER-" + ROLEHASH[i], "NODEMANAGER", HOSTNAMES[i])
			nodeman.update_config(NODEMAN_CONF)
			cmds = yarn.start_roles("yarn-NODEMANAGER-" + ROLEHASH[i])
			for cmd in cmds:
				if not cmd.wait(CMD_TIMEOUT).success:
					raise Exception(cmd.name)
			cmd = yarn.deploy_client_config("yarn-NODEMANAGER-" + ROLEHASH[i])
			if not cmd.wait(CMD_TIMEOUT).success:
                                raise Exception("Failed to deploy client config yarn-NODEMANAGER-" + ROLEHASH[i])

		#print "Restarting HDFS service..."
		#cmd = hdfs.restart()
		#if not cmd.wait(CMD_TIMEOUT).success:
		#	raise Exception("Failed to restart HDFS")

		#print "Restarting YARN service..."
		#cmd = yarn.restart()
		#if not cmd.wait(CMD_TIMEOUT).success:
		#	raise Exception("Failed to restart YARN")

		#SUCCESS!
		print "Nodes successfully added"
			
	#Remove nodes		
	elif(opts.action == "remove"):
		cluster = api.get_cluster(CLUSTER_NAME);
		hdfs = cluster.get_service("hdfs")
		yarn = cluster.get_service("yarn")
		
		print "Decommissioning Roles..."
		for role in ROLEHASH:
			cmd = yarn.decommission("yarn-NODEMANAGER-" + role)
			if not cmd.wait(CMD_TIMEOUT).success:
				raise Exception("Failed to decommission role yarn-NODEMANAGER" + role)
			cmd = hdfs.decommission("hdfs-DATANODE-" + role)
			if not cmd.wait(CMD_TIMEOUT).success:	
				raise Exception("Failed to decommission role hdfs-DATANODE-" + role)

		print "Deleting Nodes..."
		for role in ROLEHASH:
			hdfs.delete_role("hdfs-DATANODE-" + role)
			yarn.delete_role("yarn-NODEMANAGER-" + role)
		for hostname in HOSTNAMES:		
			cluster.remove_host(hostname);		
		
		#SUCCESS
		print "Nodes successfull removed."

	#Delete Cluster
	elif(opts.action == "delete"):
		cluster = api.get_cluster(CLUSTER_NAME);
		hdfs = cluster.get_service("hdfs")
		yarn = cluster.get_service("yarn")
		print "Stopping YARN..."
		cmd = yarn.stop()
		if not cmd.wait(CMD_TIMEOUT).success:
			raise Exception("Failed to stop YARN")
		print "Stopping HDFS..."
		cmd = hdfs.stop()
		if not cmd.wait(CMD_TIMEOUT).success:
			raise Exception("Failed to stop HDFS")
		print "Deleting Cluster..."
		api.delete_cluster(CLUSTER_NAME)

		#SUCCESS
		print "Cluster successfully deleted."
				
	else:
		print "PLEASE SELECT A CORRECT OPTION"
		parser.print_help()
Ejemplo n.º 40
0
from cm_api.api_client import ApiResource
from cm_api.endpoints.cms import ClouderaManager

cm_host = "127.0.0.1"

api = ApiResource(cm_host, username="******", password="******")

cms = ClouderaManager(api)

cmd = cms.get_service().restart()
cmd = cmd.wait()
print "Cloudera Manager Restart. Active: %s. Success: %s" % (cmd.active,
                                                             cmd.success)

cluster = api.get_cluster("Spark")
print cluster

restart_cluster = cluster.restart()
restart_cluster = restart_cluster.wait()
print "Cluster %s. Status - restart success: %s." % (cluster.name,
                                                     restart_cluster.success)

print "Cluster %s. Status - Configuration Stale -- Redeploying configurations" % cluster.name
redeploy_config = cluster.deploy_client_config().wait()
redeploy_config = redeploy_config.wait()
print "New configuration success: %s." % redeploy_config.success
Ejemplo n.º 41
0
class DeployCloudEraCluster(object):
    """
    This class to define and setup the base properties of the cluster node for hadoop echo system
    """
    _cloudera_manager_host = None
    _port_number = None
    _user_name = None
    _password = None
    _version = 12

    def __init__(self, cloudera_manager_host, port_number, user_name, password,
                 version):
        """
        Initialize the object to provision the cluster node for the hadoop parcel based provision
        :param cloudera_manager_host:
        :param port_number:
        :param user_name:
        :param password:
        :param version:
        """
        self._cloudera_manager_host = cloudera_manager_host
        self._port_number = port_number
        self._user_name = user_name
        self._password = password
        self._version = version  # API version vary depending upon the job you want to perform. "1" if you want to check the cluster and 12 if you want to export the property of config
        self._cloudera_manager_oconnect = ApiResource(
            self._cloudera_manager_host,
            self._port_number,
            self._user_name,
            self._password,
            version=self._version)

    def get_cluster_versions(self):
        """
        To get all the provisioned cluster versions against the Cloud era manager
        :return:
        """
        for cluster in self._cloudera_manager_oconnect:
            print("%s = %s" % (cluster.name, cluster.version))
        return cluster

    def get_cluster_services(self, cdh_version):
        """
        To get all the provisioned cluster services against the specific cluster
        :return:
        """
        for srv in cdh_version.get_all_services():
            print srv
            if srv.type == "HDFS":
                hdfs = srv
                print hdfs.name, hdfs.serviceState, hdfs.healthSummary
                print hdfs.serviceUrl
                for chk in hdfs.healthChecks:
                    print "%s --- %s" % (chk['name'], chk['summary'])

    def get_cluster_roles_info(self, cdh_version):
        """
        To get the details of all the roles for each cluster node
        :return:
        """
        for role in cdh_version.get_all_roles():
            if role.type == 'NAMENODE':
                namenode = role
        print "Role name: %s\nState: %s\nHealth: %s\nHost: %s" % (
            namenode.name, namenode.roleState, namenode.healthSummary,
            namenode.hostRef.hostId)

    def get_cdh_metrics_details(self, cdh_version):
        """
        To get the CDH metrics containing details about all the activities in the cluster node
        :param cdh_version:
        :return:
        """
        metrics = cdh_version.get_metrics()
        for metric in metrics:
            print "%s (%s)" % (metric.name, metric.unit)

    def start_service(self, cdh_service_name):
        """
        To start or stop the CDH service
        :param cdh_service_name:
        :return:
        """
        service = cdh_service_name.restart()
        print service.active

        service_status = service.wait()
        print "Active: %s. Success: %s" % (service_status.active,
                                           service_status.success)

    def restart_service(self, cdh_service_name, namenode):
        """
        To restart the service of the specific role
        :param cdh_service_name:
        :param namenode:
        :return:
        """
        commands = cdh_service_name.restart_roles(namenode.name)
        for command in commands:
            print command

    def configure_services(self, cdh_service_name):
        """
        To configure the specific services with available roles
        :return:
        """
        for name, config in cdh_service_name.get_config(
                view='full')[0].items():
            print "%s - %s - %s" % (name, config.relatedName,
                                    config.description)

    def export_cluster_template(self, template_filename, cluster_name):
        """
        To export the current cluster configuration into the given file.
        :param template_filename:
        :return:
        """
        cluster = self._cloudera_manager_oconnect.get_cluster(cluster_name)
        cdh_template = cluster.export()
        with open(template_filename, 'w') as outfile:
            json.dump(cdh_template.to_json_dict(),
                      outfile,
                      indent=4,
                      sort_keys=True)

    def import_cluster_template(self, template_filename, cluster_name):
        """
        To import cluster template configuration into given cluster
        :param template_filename:
        :param cluster_name:
        :return:
        """
        cluster = self._cloudera_manager_oconnect.get_cluster(cluster_name)
        with open(template_filename) as data_file:
            data = json.load(data_file)
        template = ApiClusterTemplate(cluster).from_json_dict(data, cluster)
        cms = ClouderaManager(cluster)
        command = cms.import_cluster_template(template)
        print(command)

    def deploy_cloudera_manager_services(self):
        """
        To deploy the cloudera manager services
        :return:
        """
        varEnableConfigAlerts = True
        varServiceGroupName = "cloudera-scm"
        varServiceUserName = "******"
        varMgmtServiceConfig = {
            'enable_config_alerts': varEnableConfigAlerts,
            'process_groupname': varServiceGroupName,
            'process_username': varServiceUserName,
        }
        varManager = self._cloudera_manager_oconnect.get_cloudera_manager()
        varMgmt = varManager.create_mgmt_service(ApiServiceSetupInfo())

        # update the cloudera service config
        varMgmt.update_config(varMgmtServiceConfig)

        # Get the cloudera services configured
        services = varManager.get_service()

        varMgmt.create_role("ACTIVITYMONITOR-1", "ACTIVITYMONITOR",
                            self._cloudera_manager_host)
        varMgmt.create_role("ALERTPUBLISHER-1", "ALERTPUBLISHER",
                            self._cloudera_manager_host)
        varMgmt.create_role("EVENTSERVER-1", "EVENTSERVER",
                            self._cloudera_manager_host)
        varMgmt.create_role("HOSTMONITOR-1", "HOSTMONITOR",
                            self._cloudera_manager_host)
        varMgmt.create_role("SERVICEMONITOR-1", "SERVICEMONITOR",
                            self._cloudera_manager_host)
        varMgmt.create_role("REPORTSMANAGER-1", "REPORTSMANAGER",
                            self._cloudera_manager_host)

    def deploy_activity_monitor(self):
        """
        To deploy the Activity monitor services
        :return:
        """
        varActivityMonitorPassword = "******"

        varMgmt = self._cloudera_manager_oconnect.get_service()

        # config for the activity monitoring
        varActivityMonitorConfig = {
            'firehose_database_host':
            "pocd-cm581-dev-manager.poc-d.internal" + ":" + "7432",
            'firehose_database_user':
            "******",
            'firehose_database_password':
            varActivityMonitorPassword,
            'firehose_database_type':
            "postgresql",
            'firehose_database_name':
            "amon",
            'firehose_heapsize':
            268435456,
            'mgmt_log_dir':
            "/opt/cloudera/log/cloudera-scm-firehose",
            'oom_heap_dump_dir':
            "/tmp",
            'oom_heap_dump_enabled':
            False,
            'max_log_backup_index':
            10,
            'max_log_size':
            100,
            'log_threshold':
            "INFO",
            'enable_config_alerts':
            "true",
        }
        varRole = varMgmt.get_role("ACTIVITYMONITOR-1")
        varRole.update_config(varActivityMonitorConfig)

    def deploy_alert_publisher(self):
        """
        To deploy the alert publisher
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varAlertPublisherConfig = {
            'alert_heapsize': 268435456,
            'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-alertpublisher",
            'oom_heap_dump_dir': "/tmp",
            'oom_heap_dump_enabled': False,
            'max_log_backup_index': 10,
            'max_log_size': 100,
            'log_threshold': "INFO",
            'enable_config_alerts': True,
        }
        varRole = varMgmt.get_role("ALERTPUBLISHER-1")
        varRole.update_config(varAlertPublisherConfig)

    def deploy_event_server(self):
        """
        To deploy event server
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varEventServerConfig = {
            'event_server_heapsize': 268435456,
            'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-eventserver",
            'eventserver_index_dir':
            "/opt/cloudera/lib/cloudera-scm-eventserver",
            'oom_heap_dump_dir': "/tmp",
            'oom_heap_dump_enabled': False,
            'max_log_backup_index': 10,
            'max_log_size': 100,
            'log_threshold': "INFO",
            'enable_config_alerts': True,
        }
        varRole = varMgmt.get_role("EVENTSERVER-1")
        varRole.update_config(varEventServerConfig)

    def deploy_host_monitor(self):
        """
        To deploy host monitor
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varHostMonitorConfig = {
            'firehose_heapsize': 268435456,
            'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose",
            'firehose_storage_dir': "/opt/cloudera/lib/cloudera-host-monitor",
            'oom_heap_dump_dir': "/tmp",
            'oom_heap_dump_enabled': False,
            'max_log_backup_index': 10,
            'max_log_size': 100,
            'log_threshold': "INFO",
            'enable_config_alerts': True,
        }
        varRole = varMgmt.get_role("HOSTMONITOR-1")
        varRole.update_config(varHostMonitorConfig)

    def deploy_service_monitor(self):
        """
        To deploy the service monitor
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varServiceMonitorConfig = {
            'firehose_heapsize': 268435456,
            'mgmt_log_dir': "/opt/cloudera/log/cloudera-scm-firehose",
            'firehose_storage_dir':
            "/opt/cloudera/lib/cloudera-service-monitor",
            'oom_heap_dump_dir': "/tmp",
            'oom_heap_dump_enabled': False,
            'max_log_backup_index': 10,
            'max_log_size': 100,
            'log_threshold': "INFO",
            'enable_config_alerts': True,
        }

        varRole = varMgmt.get_role("SERVICEMONITOR-1")
        varRole.update_config(varServiceMonitorConfig)

    def deploy_report_manager(self):
        """
        To deploy the service Report Manager
        :return:
        """
        varReportManagerPassword = "******"
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varReportManagerConfig = {
            'headlamp_database_host':
            "pocd-cm581-dev-manager.poc-d.internal" + ":" + "7432",
            'headlamp_database_user':
            "******",
            'headlamp_database_password':
            varReportManagerPassword,
            'headlamp_database_type':
            "postgresql",
            'headlamp_database_name':
            "rman",
            'headlamp_heapsize':
            536870912,
            'mgmt_log_dir':
            "/opt/cloudera/log/cloudera-scm-headlamp",
            'headlamp_scratch_dir':
            "/opt/cloudera/lib/cloudera-scm-headlamp",
            'oom_heap_dump_dir':
            "/tmp",
            'oom_heap_dump_enabled':
            False,
            'max_log_backup_index':
            10,
            'max_log_size':
            100,
            'log_threshold':
            "INFO",
            'enable_config_alerts':
            True,
        }
        varRole = varMgmt.get_role("REPORTSMANAGER-1")
        varRole.update_config(varReportManagerConfig)

    def deploy_services(self):
        """
        To deploy all the cloudera manager services
        :return:
        """
        varMgmt = self._cloudera_manager_oconnect.get_service()
        varMgmt.start().wait()

    def create_hadoop_cluster(self):
        """
        To create hadoop cluster with multiple data and name nodes and configure different services
        :return:
        """
        varClusterName = "POC-D Cluster"
        varCDHVersion = "CDH5"
        varCDHFullVersion = "5.8.0"

        varCluster = varApiResource.create_cluster(varClusterName,
                                                   varCDHVersion,
                                                   varCDHFullVersion)
Ejemplo n.º 42
0
#!/usr/bin/env python

import sys
from cm_api.api_client import ApiResource
from cm_api.api_client import ApiException

CMD_TIMEOUT = 180
api = ApiResource(sys.argv[1], username="******", password="******", use_tls=False, version=4)
cluster = api.get_cluster(sys.argv[2])

try:
    flume = cluster.get_service("flume1")
except ApiException:
    flume = cluster.create_service("flume1", "FLUME")

for i in xrange(3, len(sys.argv)):
    name = "flume-agent" + str(i - 2)
    try:
        flume.get_role(name)
    except ApiException:
        flume.create_role(name, "AGENT", sys.argv[i])

flume_service_config = {
    'hbase_service': 'hbase1',
    'hdfs_service': 'hdfs1'
}


flume.update_config(svc_config=flume_service_config)

Ejemplo n.º 43
0
def api_data_collection(request):
    """
        Application information collection restful api. Query completed application information on specific conditions and accumulate it.
        @method: GET
        @param from_time: Application finish time after this time. format : "%d/%m/%Y %H:%M". time zone=UTC+8
        @param end_time: Application finish time before this time. format : "%d/%m/%Y %H:%M". time zone=UTC+8
        @param queue_name: Query completed application information on specific queue name.
        @param app_type: Query completed application information on specific application type.
        @param app_state: Query completed application information on specific application states. specified as a comma-separated list. ex: FINISHED,FAILED,KILLED
        @return: json data
                    { "success":False, "message":"error message" }
                    { "success":True, "message": { "queue_view":{...}, "group_view":{...} } }
        """
    if request.method == "GET":
        response = {'success':False, 'message':''}
        filter_dict = {}
        if "queue_name" in request.GET:
            filter_dict['queue_name'] = request.GET.get('queue_name')
        if "app_type" in request.GET:
            filter_dict['app_type'] = request.GET.get('app_type')
        if "app_state" in request.GET:
            filter_dict['app_state'] = request.GET.get('app_state').split(',')
        #
        # time zone = Asia/Taipei = UTC+8
        from_time = datetime.strptime(request.GET.get('from_time'), "%d/%m/%Y %H:%M") - timedelta(hours=8)
        to_time = datetime.strptime(request.GET.get('end_time'), "%d/%m/%Y %H:%M") - timedelta(hours=8)
        #
        # get config
        config = ConfigParser.ConfigParser()
        config.read( os.path.join(settings.BASE_DIR, "cluster.ini") )
        cm_host = config.get("CM", "cm.host")
        cm_port = config.get("CM", "cm.port")
        cm_version = config.get("CM", "cm.version")
        cm_username = config.get("CM", "cm.username")
        cm_password = config.get("CM", "cm.password")
        #
        cluster_name = config.get("Cluster", "cluster.name")
        yarn_name = config.get("Cluster", "cluster.yarn.name")
        #
        ldap_host = config.get("Ldap", "ldap.host")
        ldap_username = config.get("Ldap", "ldap.username")
        ldap_password = config.get("Ldap", "ldap.password")
        #
        # get active resource manager info
        try:
            cm_api = ApiResource( cm_host, int(cm_port), username=cm_username, password=cm_password, version=int(cm_version) )
            cm_cluster_obj = cm_api.get_cluster(name=cluster_name)
            cm_yarn_obj = cm_cluster_obj.get_service(name=yarn_name)
            #
            find_active_rm = False
            for rm in cm_yarn_obj.get_roles_by_type(role_type="RESOURCEMANAGER"):
                if rm.haStatus == "ACTIVE":
                    host = cm_api.get_host(rm.hostRef.hostId)
                    active_rm_ip = host.ipAddress
                    active_rm_port = 8088
                    find_active_rm = True
            #
            if not find_active_rm:
                message = "can not find active rm"
                print( "[ERROR] " + message )
                response['success'] = False
                response['message'] = message
                return HttpResponse( json.dumps(response) )
        except Exception, e:
            message = "can not get cm yarn object"
            print( "[ERROR] " + message + str(e) )
            response['success'] = False
            response['message'] = message
            return HttpResponse( json.dumps(response) )
        #
        # all application statistics
        statistics_response = applications_statistics(active_rm_ip, active_rm_port, from_time, to_time, filter_dict)
        if statistics_response['success']:
            #
            # create ldap connection. access ldap to get group of account
            if create_ldap_connection(ldap_host, ldap_username, ldap_password):
                ldap_connection = create_ldap_connection(ldap_host, ldap_username, ldap_password)
            else:
                message = "can not connect to ldap://" + ldap_host
                response['success'] = False
                response['message'] = message
                return HttpResponse( json.dumps(response) )
            #
            # init queue view result & group view result
            queue_view_final_result = statistics_response['message']
            group_view_final_result = {}
            #
            #
            # add group information to queue view result and accumulate the result by group
            for queue, queue_info in queue_view_final_result.items():
                #
                queue_view_final_result[queue]['group'] = ''
                # queue naming : root.SYSTEM.<account> , root.PERSONAL.<account>
                m = re.match(r"(?P<root>\w+)\.(?P<second>\w+)\.(?P<third>\w+)", queue)
                if m and m.group('root') == 'root' and ( m.group('second') == 'SYSTEM' or m.group('second') == 'PERSONAL' ):
                    queue_view_final_result[queue]['account'] = m.group('third')
                    group_query_result = query_group_of_user(ldap_connection, queue_view_final_result[queue]['account'])
                    group = group_query_result['group']
                    project_name = group_query_result['name']
                    queue_view_final_result[queue]['group'] = group
                    if not group_view_final_result.has_key(group):
                        group_view_final_result[group] = { 'apps':{}, 'queues':[], 'name':project_name }
                    group_view_final_result[group]['queues'].append(queue)
                    #
                    for app_type, app_info in queue_info['apps'].items():
                        for app_state, data in app_info['final_status'].items():
                            if not group_view_final_result[group]['apps'].has_key(app_state):
                                group_view_final_result[group]['apps'][app_state] = {}
                            for key in data:
                                if not group_view_final_result[group]['apps'][app_state].has_key(key):
                                    group_view_final_result[group]['apps'][app_state][key] = data[key]
                                else:
                                    group_view_final_result[group]['apps'][app_state][key] += data[key]
            #
            # after finish to accumulate all result, unbind ldap connection
            ldap_connection.unbind()
        else:
            response['success'] = False
            response['message'] = statistics_response['message']
            return HttpResponse( json.dumps(response) )
        #
        # transform duration type from datetime.timedelta to string
        queue_view_final_result = transform_queue_view_response(queue_view_final_result)
        group_view_final_result = transform_project_view_response(group_view_final_result)
        #
        response['success'] = True
        response['message'] = {}
        response['message']['queue_view'] = queue_view_final_result
        response['message']['group_view'] = group_view_final_result
        print json.dumps("[DEBUG] response = " + json.dumps(response))
        return HttpResponse( json.dumps(response) )
Ejemplo n.º 44
0
        print >> sys.stderr, '\n\tPlease specify a database and a table.'
        usage()
        return cf['RET_BADOPTS']

    API = ApiResource(cmHost,
                      cf['CM_PORT'],
                      version=cf['CM_VERSION'],
                      username=cf['CM_USER'],
                      password=cf['CM_PASSWD'],
                      use_tls=True)
    LOG.debug('Connected to CM host on ' + cmHost)

    procUser = getUsername()
    LOG.debug('Process effective username is ' + procUser)

    cluster = API.get_cluster(cf['CLUSTER_NAME'])

    prod_nav = {
        'proto': cf['PROD_NAV_PROTO'],
        'host': cf['PROD_NAV_HOST'],
        'port': cf['PROD_NAV_PORT'],
        'user': cf['PROD_NAV_USER'],
        'passwd': cf['PROD_NAV_PASSWD']
    }
    dr_nav = {
        'proto': cf['DR_NAV_PROTO'],
        'host': cf['DR_NAV_HOST'],
        'port': cf['DR_NAV_PORT'],
        'user': cf['DR_NAV_USER'],
        'passwd': cf['DR_NAV_PASSWD']
    }
Ejemplo n.º 45
0
import json
from cm_api.api_client import ApiResource
from cm_api.endpoints.types import ApiClusterTemplate
from cm_api.endpoints.cms import ClouderaManager

resource = ApiResource(
    "4d92d0ab-2fa6-4d9d-bef5-dbf0f5dc29ab.priv.cloud.scaleway.com",
    7180,
    "admin",
    "admin",
    version=12)
cluster = resource.get_cluster("Cluster 2")
template = cluster.export()
with open('/tmp/template.json', 'w') as outfile:
    json.dump(template.to_json_dict(), outfile, indent=4, sort_keys=True)
def main():
    configfile=''

    if len(sys.argv) > 3 or len(sys.argv) < 3:
        print("Usage: %s -i configfile " % sys.argv[0])
        sys.exit(2)

    try:
        myopts, args = getopt.getopt(sys.argv[1:],"i:h")
    except getopt.GetoptError as e:
        print (str(e))
        print("Usage: %s -i configfile " % sys.argv[0])
        sys.exit(2)

    for o, a in myopts:
        if o == '-i':
            configfile=a
        elif o == '-h':
            print("Usage: %s -i configfile " % sys.argv[0])

    if os.path.isfile(configfile):
        print "processing configuration file...."
        pass
    else:
        print "file does not exist..."
        sys.exit(2)


    config = ConfigObj(configfile)
    cluster_name = config['cluster']['name']
    cdh_manager = config['cluster']['cdh_manager']
    cm_hostname = config['cluster']['cm_hostname']
    hostnames = config['cluster']['server_hostnames']
    services = config['cluster']['services']
    server_rack = config['cluster']['server_rack']
    server_login = config['cluster']['server_login']
    server_passwd = config['cluster']['server_passwd']
    server_key = config['cluster']['server_key']
    server_passphrase = config['cluster']['server_passphrase']
    cloudera_manager_repo = config['cluster']['cloudera_manager_repo']

    cm_host = cdh_manager
    api = ApiResource(cm_host, username="******", password="******")
    #print config['hive']['config']['hive_metastore_database_name']


    for c in api.get_all_clusters():
        if c.name == cluster_name:
            #cluster = c
            print "Cluster %s already exists " % (cluster_name)
            print "Please manually delete the cluster %s , all hosts and associated services." % (cluster_name)
            sys.exit(0)
        else:
            print "Starting the automation process..."

            pass


    cdhproc(cluster_name,api,hostnames,server_rack,server_login,server_passwd,server_key,server_passphrase,cloudera_manager_repo)
    createMGMT(api,cm_hostname,server_login,server_passwd,server_passphrase,server_key)
    deployHDFSMAP(cluster_name,api,configfile)

    if "yarn" in services:
        createYarn(cluster_name,api,configfile)
    if "zookeeper" in services:
        createZookeeper(cluster_name,api,configfile)
    if "hive" in services:
        createHive(cluster_name,api,configfile)
    if "hbase" in services:
        createHbase(cluster_name,api,configfile)
    if "spark" in services:
        createSpark(cluster_name,api,configfile)
    if "impala" in services:
        createImpala(cluster_name,api,configfile)

    cluster = api.get_cluster(cluster_name)

    print "Stopping cluster..."
    cmd = cluster.stop().wait()
    print "Active: %s. Success: %s" % (cmd.active, cmd.success)
    print "Starting cluster..."
    cmd =cluster.start().wait()
    print "Active: %s. Success: %s" % (cmd.active, cmd.success)

    if "solr" in services:
        createSolr(cluster_name,api,configfile)
    if "flume" in services:
        createFlume(cluster_name,api,configfile)
    if "oozie" in services:
        createOozie(cluster_name,api,configfile)
    if "sqoop" in services:
        createSqoop(cluster_name,api,configfile)
    if "hue" in services:
        createHue(cluster_name,api,configfile)

    #print "Stopping cluster..."
    #cmd = cluster.stop().wait()
    #print "Active: %s. Success: %s" % (cmd.active, cmd.success)
    #print "Starting cluster..."
    #cmd =cluster.start().wait
    #print "Active: %s. Success: %s" % (cmd.active, cmd.success)


    print "Cluster deployed successfully...."
    print "Login to: http://"+cdh_manager+":7180"
if len(sys.argv) != 2:
  print "Error: Wrong number of arguments"
  print "Usage: create-flume-service.py <flume-service-name>"
  print "Example: create-flume-service.py  Flume"
  quit(1)

## Name of Flume Service to create
flume_service_name = sys.argv[1]

## Connect to CM
print "\nConnecting to Cloudera Manager at " + cm_host + ":" + cm_port + "..."
api = ApiResource(server_host=cm_host, server_port=cm_port, username=cm_login, password=cm_password, version=cm_api_version)
print "Connection is good!"

## Get the Cluster 
cluster = api.get_cluster(cluster_name)

## Get the existing Services
service_list = cluster.get_all_services()

## Check that a FLUME service does not already exist
## You could skip this check if you want if you want to have more than one FLUME services on your cluster
## I included the check just as a safeguard in case you only want one FLUME service on your cluster
for service in service_list:
  if service.type == "FLUME":
    print "Error: A FLUME Service already exists (Service Name: '" + service.name + "')"
    print "Aborting..."
    exit(1)

## Check that there is an HDFS Service with the given name unless hdfs_service_name was specified as 'none'
if hdfs_service_name != "none":
Ejemplo n.º 48
0
def get_cluster():
    # connect to cloudera manager
    api = ApiResource(CM_HOST, username="******", password="******")
    # Take care of the case where cluster name has changed
    # Hopefully users wouldn't use this CM to deploy another cluster manually
    return (api, api.get_cluster(api.get_all_clusters()[0].name))
Ejemplo n.º 49
0
    def runner(self, args, display=True):
        values = []
        health_values = []

        plugin_args = args.split() \
                    if args is not None and (len(args.strip()) > 0) \
                    else ""

        options = self.read_args(plugin_args)

        if options.hadoopdistro == 'CDH':
            api = ApiResource(server_host=options.cmhost, \
                            server_port=options.cmport, \
                            username=options.cmuser, \
                            password=options.cmpassword, \
                            version=11)
            cluster = api.get_cluster(api.get_all_clusters()[0].name)
            cdh = CDHData(api, cluster)
        else:
            cdh = HDPData(options.cmhost, options.cmuser, options.cmpassword)
        hbase = None

        def run_test_sequence():
            # pylint: disable=too-many-return-statements
            hbase = happybase.Connection(host=cdh.get_hbase_endpoint())
            if abort_test_sequence is True:
                return
            reason = []
            try:
                start = TIMESTAMP_MILLIS()

                try:
                    hbase.create_table('blackbox_test_table', {'cf': dict()})
                    logging.debug("test table created")
                except AlreadyExists:
                    logging.debug("test table exists")

                table = hbase.table('blackbox_test_table')
                end = TIMESTAMP_MILLIS()
                create_table_ok = True
                create_table_ms = end - start
                values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'),
                          "hadoop.HBASE.create_table_time_ms", [],
                          create_table_ms))
            except:
                LOGGER.error(traceback.format_exc())
                create_table_ok = False
                reason = ['Create HBase table operation failed']
            health_values.append(
                Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'),
                      "hadoop.HBASE.create_table_succeeded", reason,
                      create_table_ok))

            #write some data to it
            if abort_test_sequence is True:
                return
            reason = []
            try:
                start = TIMESTAMP_MILLIS()
                table.put('row_key', {'cf:column': 'value'})
                end = TIMESTAMP_MILLIS()
                write_hbase_ok = True
                write_hbase_ms = end - start
                values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'),
                          "hadoop.HBASE.write_time_ms", [], write_hbase_ms))
            except:
                LOGGER.error(traceback.format_exc())
                write_hbase_ok = False
                reason = ['Failed to insert row in HBase table']
            health_values.append(
                Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'),
                      "hadoop.HBASE.write_succeeded", reason, write_hbase_ok))

            #read some data from it
            if abort_test_sequence is True:
                return
            reason = []
            try:
                start = TIMESTAMP_MILLIS()
                row = table.row('row_key', columns=['cf:column'])
                end = TIMESTAMP_MILLIS()
                read_hbase_ms = end - start
                read_hbase_ok = row['cf:column'] == 'value'
                values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'),
                          "hadoop.HBASE.read_time_ms", [], read_hbase_ms))
            except:
                LOGGER.error(traceback.format_exc())
                hbase_fix_output = subprocess.check_output([
                    'sudo', '-u', 'hbase', 'hbase', 'hbck', '-repair',
                    'blackbox_test_table'
                ])
                for line in hbase_fix_output.splitlines():
                    if 'Status:' in line or 'inconsistencies detected' in line:
                        LOGGER.debug(line)
                subprocess.check_output([
                    'sudo', '-u', 'hbase', 'hbase', 'zkcli', 'rmr',
                    '/hbase/table/blackbox_test_table'
                ])
                subprocess.check_output([
                    'sudo', '-u', 'hdfs', 'hadoop', 'fs', '-rm', '-r', '-f',
                    '-skipTrash', '/hbase/data/default/blackbox_test_table'
                ])
                read_hbase_ok = False
                reason = ['Failed to fetch row by row key from HBase']
            health_values.append(
                Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'),
                      "hadoop.HBASE.read_succeeded", reason, read_hbase_ok))

            #create some hive metadata
            reason = []
            if abort_test_sequence is True:
                return
            try:
                start = TIMESTAMP_MILLIS()
                hive = hive_api.connect(cdh.get_hive_endpoint())
                end = TIMESTAMP_MILLIS()
                hive.cursor().execute("DROP TABLE blackbox_test_table")
                connect_to_hive_ms = end - start
                connect_to_hive_ok = True
                values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'),
                          "hadoop.HIVE.connection_time_ms", [],
                          connect_to_hive_ms))
            except:
                LOGGER.error(traceback.format_exc())
                connect_to_hive_ok = False
                reason = ['Failed to connect to Hive Metastore']
            health_values.append(
                Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'),
                      "hadoop.HIVE.connection_succeeded", reason,
                      connect_to_hive_ok))

            if abort_test_sequence is True:
                return
            reason = []
            try:
                start = TIMESTAMP_MILLIS()
                hive.cursor().execute((
                    "CREATE EXTERNAL TABLE "
                    "blackbox_test_table (key STRING, value STRING)"
                    "STORED BY \"org.apache.hadoop.hive.hbase.HBaseStorageHandler\" "
                    "WITH SERDEPROPERTIES "
                    "(\"hbase.columns.mapping\" = \":key,cf:column\") "
                    "TBLPROPERTIES(\"hbase.table.name\" = \"blackbox_test_table\")"
                ))
                end = TIMESTAMP_MILLIS()
                create_metadata_ms = end - start
                create_metadata_ok = True
                values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'),
                          "hadoop.HIVE.create_metadata_time_ms", [],
                          create_metadata_ms))
            except:
                LOGGER.error(traceback.format_exc())
                create_metadata_ok = False
                reason = [
                    'CREATE EXTERNAL TABLE statement failed on Hive Metastore'
                ]
            health_values.append(
                Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'),
                      "hadoop.HIVE.create_metadata_succeeded", reason,
                      create_metadata_ok))

            #read some data via impala using it
            if abort_test_sequence is True:
                return

            if cdh.get_impala_endpoint() is not None:
                reason = []
                try:
                    start = TIMESTAMP_MILLIS()
                    impala = connect(host=cdh.get_impala_endpoint(),
                                     port=options.impalaport)
                    end = TIMESTAMP_MILLIS()
                    impala.cursor().execute("invalidate metadata")
                    connect_to_impala_ms = end - start
                    connect_to_impala_ok = True
                    values.append(
                        Event(TIMESTAMP_MILLIS(), cdh.get_name('IMPALA'),
                              "hadoop.IMPALA.connection_time_ms", [],
                              connect_to_impala_ms))
                except:
                    LOGGER.error(traceback.format_exc())
                    connect_to_impala_ok = False
                    reason = ['Failed to connect to Impala']
                health_values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('IMPALA'),
                          "hadoop.IMPALA.connection_succeeded", reason,
                          connect_to_impala_ok))

                if abort_test_sequence is True:
                    return
                reason = []
                try:
                    start = TIMESTAMP_MILLIS()
                    impala_cursor = impala.cursor()
                    impala_cursor.execute("SELECT * FROM blackbox_test_table")
                    table_contents = impala_cursor.fetchall()
                    end = TIMESTAMP_MILLIS()
                    read_impala_ms = end - start
                    read_impala_ok = table_contents[0][1] == 'value'
                    values.append(
                        Event(TIMESTAMP_MILLIS(), cdh.get_name('IMPALA'),
                              "hadoop.IMPALA.read_time_ms", [],
                              read_impala_ms))
                except:
                    LOGGER.error(traceback.format_exc())
                    read_impala_ok = False
                    reason = ['Failed to SELECT from Impala']
                health_values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('IMPALA'),
                          "hadoop.IMPALA.read_succeeded", reason,
                          read_impala_ok))
            else:
                reason = []
                try:
                    start = TIMESTAMP_MILLIS()
                    hive_cursor = hive.cursor()
                    hive_cursor.execute("SELECT * FROM blackbox_test_table")
                    table_contents = hive_cursor.fetchall()
                    end = TIMESTAMP_MILLIS()
                    read_hive_ms = end - start
                    read_hive_ok = table_contents[0][1] == 'value'
                    values.append(
                        Event(TIMESTAMP_MILLIS(), cdh.get_name('HQUERY'),
                              "hadoop.HQUERY.read_time_ms", [], read_hive_ms))
                except:
                    LOGGER.error(traceback.format_exc())
                    read_hive_ok = False
                    reason = ['Failed to SELECT from Hive']
                health_values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('HQUERY'),
                          "hadoop.HQUERY.read_succeeded", reason,
                          read_hive_ok))

            #delete metadata
            if abort_test_sequence is True:
                return
            reason = []
            try:
                start = TIMESTAMP_MILLIS()
                hive.cursor().execute("DROP TABLE blackbox_test_table")
                end = TIMESTAMP_MILLIS()
                drop_metadata_ms = end - start
                drop_metadata_ok = True
                values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'),
                          "hadoop.HIVE.drop_table_time_ms", [],
                          drop_metadata_ms))
            except:
                LOGGER.error(traceback.format_exc())
                drop_metadata_ok = False
                reason = ['Failed to DROP table in Hive Metastore']
            health_values.append(
                Event(TIMESTAMP_MILLIS(), cdh.get_name('HIVE'),
                      "hadoop.HIVE.drop_table_succeeded", reason,
                      drop_metadata_ok))

            #delete hbase table
            if abort_test_sequence is True:
                return
            reason = []
            try:
                start = TIMESTAMP_MILLIS()
                # Disabled deleting table to work around apparent hbase bug (see VPP-17) but leaving
                # test step in so it can be easily re-enabled for testing.
                #hbase.disable_table('blackbox_test_table')
                #hbase.delete_table('blackbox_test_table')
                end = TIMESTAMP_MILLIS()
                drop_table_ms = end - start
                drop_table_ok = True
                values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'),
                          "hadoop.HBASE.drop_table_time_ms", [],
                          drop_table_ms))
            except:
                LOGGER.error(traceback.format_exc())
                drop_table_ok = False
                reason = ['Failed to drop table in HBase']
            health_values.append(
                Event(TIMESTAMP_MILLIS(), cdh.get_name('HBASE'),
                      "hadoop.HBASE.drop_table_succeeded", reason,
                      drop_table_ok))

        def to_status(flag):
            '''
            Convert True to OK and False to ERROR
            '''
            if flag in [True, False]:
                status = 'OK' if flag is True else 'ERROR'
            else:
                status = flag

            return status

        def default_health_value(name, service, operation, failed_step):
            result = False
            if len([event for event in health_values
                    if event.metric == name]) == 0:
                if failed_step is not None:
                    message = 'Did not attempt to %s due to timeout waiting for: %s' % (
                        operation, failed_step)
                else:
                    message = 'Timed out waiting for %s to complete' % operation

                health_values.append(
                    Event(TIMESTAMP_MILLIS(), cdh.get_name(service), name,
                          [message], False))
                result = True
            return result

        test_thread = threading.Thread(target=run_test_sequence)
        test_thread.daemon = True
        abort_test_sequence = False
        test_thread.start()
        test_thread.join(60.0)
        abort_test_sequence = True
        if hbase is not None:
            hbase.close()

        failed_step = None
        if default_health_value("hadoop.HBASE.create_table_succeeded", "HBASE",
                                "create HBase table",
                                failed_step) and failed_step is None:
            failed_step = "create HBase table"
        if default_health_value("hadoop.HBASE.write_succeeded", "HBASE",
                                "write to HBase",
                                failed_step) and failed_step is None:
            failed_step = "write to HBase"
        if default_health_value("hadoop.HBASE.read_succeeded", "HBASE",
                                "read from HBase",
                                failed_step) and failed_step is None:
            failed_step = "read from HBase"
        if default_health_value("hadoop.HIVE.connection_succeeded", "HIVE",
                                "connect to Hive Metastore",
                                failed_step) and failed_step is None:
            failed_step = "connect to Hive Metastore"
        if default_health_value("hadoop.HIVE.create_metadata_succeeded",
                                "HIVE", "create Hive Metastore table",
                                failed_step) and failed_step is None:
            failed_step = "create Hive Metastore table"
        if cdh.get_impala_endpoint() is not None:
            if default_health_value("hadoop.IMPALA.connection_succeeded",
                                    "IMPALA", "connect to Impala",
                                    failed_step) and failed_step is None:
                failed_step = "connect to Impala"
            if default_health_value("hadoop.IMPALA.read_succeeded", "IMPALA",
                                    "SELECT from Impala",
                                    failed_step) and failed_step is None:
                failed_step = "SELECT from Impala"
        else:
            if default_health_value("hadoop.HQUERY.read_succeeded", "HQUERY",
                                    "SELECT from Hive",
                                    failed_step) and failed_step is None:
                failed_step = "SELECT from Hive"
        if default_health_value("hadoop.HIVE.drop_table_succeeded", "HIVE",
                                "DROP table in Hive Metastore",
                                failed_step) and failed_step is None:
            failed_step = "DROP table in Hive Metastore"
        if default_health_value("hadoop.HBASE.drop_table_succeeded", "HBASE",
                                "drop table in HBase",
                                failed_step) and failed_step is None:
            failed_step = "drop table in HBase"

        cdh_status_indicators = cdh.get_status_indicators()
        health_values.extend(cdh_status_indicators)
        overall = {}
        for health_val in health_values:
            try:
                current = overall[health_val.source]
                current_val = to_status(current.value)
                current_causes = current.causes
            except KeyError:
                current_val = 'OK'
                current_causes = []

            update = to_status(health_val.value)

            # If current is ERROR, output is ERROR, regardless
            # If current is WARN, output is WARN if update is OK but ERROR if further WARN or ERROR
            # If update is OK, output is OK if OK, WARN if WARN and ERROR if ERROR

            out = 'ERROR'
            if current_val != "ERROR":
                if current_val == 'WARN':
                    if update == 'OK':
                        out = 'WARN'
                if current_val == 'OK':
                    out = update
            current_val = out
            current_causes.extend(health_val.causes)

            overall[health_val.source] = Event(
                health_val.timestamp, health_val.source,
                'hadoop.%s.health' % cdh.get_type(health_val.source),
                current_causes, current_val)

        values.extend(health_values)
        values.extend(overall.values())

        if display:
            self._do_display(values)

        return values
Ejemplo n.º 50
0
class Deploy:
    def __init__(self,
                 cm_port='7180',
                 cm_user='******',
                 cm_passwd='admin',
                 cluster_name='cluster1'):

        self.cluster_name = cluster_name
        self.cdh_version = "CDH5"

        self.cfg = ParseConfig()
        self.host_list = self.cfg.get_hosts()

        self._get_host_allocate()
        self.cm_host = self.host_list[0]

        self.api = ApiResource(self.cm_host,
                               cm_port,
                               cm_user,
                               cm_passwd,
                               version=7)
        self.cm = self.api.get_cloudera_manager()

        try:
            self.cluster = self.api.get_cluster(self.cluster_name)
        except:
            try:
                self.cluster = self.api.create_cluster(self.cluster_name,
                                                       self.cdh_version)
            except:
                err('Cannot connect to cloudera manager on %s' % self.cm_host)

        # add all our hosts to the cluster
        try:
            self.cluster.add_hosts(self.host_list)
            info('Add hosts successfully')
        except Exception as e:
            if e.code == 400:
                info('Already Added hosts')
            elif e.code == 404:
                err(e.message)

    def _auto_allocate(self, hosts):
        # enable mgmt node if node count is larger than mgmt_th
        mgmt_th = 6

        if type(hosts) != list: err('hosts parameter should be a list')
        host_num = len(hosts)
        # node<=3, ZK=1 ,node>3, ZK=3
        zk_num = 1 if host_num <= 3 else 3

        # with mgmt node
        if host_num >= mgmt_th:
            self.ap_host = self.es_host = self.ho_host = self.sm_host = self.nn_host = self.hm_host = self.jt_host = hosts[
                0]
            self.dn_hosts = self.rs_hosts = self.tt_hosts = hosts[1:]
            self.snn_host = hosts[1]
            self.hms_host = hosts[2]
            self.hs2_host = hosts[3]
        # without mgmt node
        else:
            if host_num == 1:
                self.ap_host = self.es_host = self.ho_host = self.sm_host = self.jt_host = \
                self.nn_host = self.hm_host = self.snn_host = self.hms_host = self.hs2_host = hosts[0]
            elif host_num > 1:
                # nn, snn not on same node
                tmp_hosts = hosts[:]
                self.nn_host = choice(tmp_hosts)
                tmp_hosts.remove(self.nn_host)
                self.snn_host = choice(tmp_hosts)
                self.hm_host = choice(tmp_hosts)
                self.jt_host = choice(hosts)
                self.hms_host = choice(hosts)
                self.hs2_host = choice(hosts)
                # cm
                self.ap_host = choice(hosts)
                self.es_host = choice(hosts)
                self.ho_host = choice(hosts)
                self.sm_host = choice(hosts)

            self.dn_hosts = self.rs_hosts = self.tt_hosts = hosts

        self.zk_hosts = hosts[-zk_num:]

    def _get_host_allocate(self):
        roles = self.cfg.get_roles()
        # auto set if no role config found
        if not roles:
            self._auto_allocate(self.host_list)
            return

        valid_roles = [
            'DN', 'RS', 'ZK', 'HM', 'NN', 'SNN', 'AP', 'ES', 'SM', 'HO', 'TT',
            'JT', 'HMS', 'HS2'
        ]
        role_host = defaultdict(list)

        for item in roles:
            for role in item[1]:
                role = role.strip()
                if role not in valid_roles: err('Incorrect role config')
                role_host[role].append(item[0])

        # cdh
        self.nn_host = role_host['NN'][0]
        self.snn_host = role_host['SNN'][0]
        self.hm_host = role_host['HM'][0]
        self.jt_host = role_host['JT'][0]
        self.hms_host = role_host['HMS'][0]
        self.hs2_host = role_host['HS2'][0]
        self.tt_hosts = role_host['TT']
        self.zk_hosts = role_host['ZK']
        self.dn_hosts = role_host['DN']
        self.rs_hosts = role_host['RS']
        # cm
        self.ap_host = role_host['AP'][0]
        self.es_host = role_host['ES'][0]
        self.ho_host = role_host['HO'][0]
        self.sm_host = role_host['SM'][0]

    def setup_cms(self):
        try:
            self.cm.delete_mgmt_service()
        except:
            pass

        # create the management service
        try:
            mgmt = self.cm.create_mgmt_service(ApiServiceSetupInfo())
            mgmt.create_role('AlertPublisher', "ALERTPUBLISHER", self.ap_host)
            mgmt.create_role('EventServer', "EVENTSERVER", self.es_host)
            mgmt.create_role('HostMonitor', "HOSTMONITOR", self.hm_host)
            mgmt.create_role('ServiceMonitor', "SERVICEMONITOR", self.sm_host)
            ok('Cloudera management service created successfully.')
        except ApiException:
            info('Cloudera management service had already been created.')

    def setup_parcel(self):
        parcels_list = []
        i = 1
        for p in self.cluster.get_all_parcels():
            if p.stage == 'AVAILABLE_REMOTELY': continue
            elif p.stage == 'ACTIVATED':
                info('Parcel [%s] has already been activated' % p.version)
                return
            else:
                print '\t' + str(i) + ': ' + p.product + ' ' + p.version
                i += 1
                parcels_list.append(p)

        if len(parcels_list) == 0:
            err('No downloaded ' + self.cdh_version + ' parcel found!')
        elif len(parcels_list) > 1:
            index = raw_input('Input parcel number:')
            if not index.isdigit:
                err('Error index, must be a number')
            cdh_parcel = parcels_list[int(index) - 1]
        else:
            cdh_parcel = parcels_list[0]

    #  # download the parcel
    #  print "Starting parcel download. This might take a while."
    #  cmd = cdh_parcel.start_download()
    #  if cmd.success != True:
    #      print "Parcel download failed!"
    #      exit(0)

    #  # make sure the download finishes
    #  while cdh_parcel.stage != 'DOWNLOADED':
    #  sleep(5)
    #      cdh_parcel = self.cluster.get_parcel(cdh_parcel.product, cdh_parcel.version)

    #  print cdh_parcel.product + ' ' + cdh_parcel.version + " downloaded"

    # distribute the parcel
        info('Starting parcel distribution. This might take a while.')
        cmd = cdh_parcel.start_distribution()
        i = 0
        while cmd.success == None:
            i += 1
            sleep(5)
            cmd = cmd.fetch()
            s = '.' * i
            print '\r%s' % s,
            sys.stdout.flush()
        if cmd.success != True:
            err('Parcel distribution failed!')

        # make sure the distribution finishes
        while cdh_parcel.stage != "DISTRIBUTED":
            sleep(5)
            cdh_parcel = self.cluster.get_parcel(cdh_parcel.product,
                                                 cdh_parcel.version)

        ok(cdh_parcel.product + ' ' + cdh_parcel.version + ' distributed')

        # activate the parcel
        cmd = cdh_parcel.activate()
        if cmd.success != True:
            err('Parcel activation failed!')

        # make sure the activation finishes
        while cdh_parcel.stage != "ACTIVATED":
            sleep(5)
            cdh_parcel = self.cluster.get_parcel(cdh_parcel.product,
                                                 cdh_parcel.version)

        ok(cdh_parcel.product + ' ' + cdh_parcel.version + ' activated')

    def _create_service(self, sdata):
        try:
            self.cluster.get_service(sdata['sname'])
            info('Service %s had already been configured' % sdata['sname'])
        except ApiException:
            service = self.cluster.create_service(sdata['sname'],
                                                  sdata['stype'])
            ok('Service %s had been created successfully' % sdata['sname'])
            for role in sdata['roles']:
                if role.has_key('rhost'):
                    service.create_role(role['rname'], role['rtype'],
                                        role['rhost'])
                elif role.has_key('rhosts'):
                    rid = 0
                    for host in role['rhosts']:
                        rid += 1
                        service.create_role(role['rname'] + '-' + str(rid),
                                            role['rtype'], host)

    def setup_cdh(self):
        service_data = [{
            'sname':
            'hdfs',
            'stype':
            'HDFS',
            'roles': [{
                'rname': 'hdfs-namenode',
                'rtype': 'NAMENODE',
                'rhost': self.nn_host
            }, {
                'rname': 'hdfs-secondarynamenode',
                'rtype': 'SECONDARYNAMENODE',
                'rhost': self.snn_host
            }, {
                'rname': 'hdfs-datanode',
                'rtype': 'DATANODE',
                'rhosts': self.dn_hosts
            }]
        }, {
            'sname':
            'zookeeper',
            'stype':
            'ZOOKEEPER',
            'roles': [{
                'rname': 'zookeeper',
                'rtype': 'SERVER',
                'rhosts': self.zk_hosts
            }]
        }, {
            'sname':
            'hbase',
            'stype':
            'HBASE',
            'roles': [{
                'rname': 'hbase-master',
                'rtype': 'MASTER',
                'rhost': self.hm_host
            }, {
                'rname': 'hdfs-regionserver',
                'rtype': 'REGIONSERVER',
                'rhosts': self.rs_hosts
            }]
        }, {
            'sname':
            'hive',
            'stype':
            'HIVE',
            'roles': [{
                'rname': 'hive-metastore',
                'rtype': 'HIVEMETASTORE',
                'rhost': self.hms_host
            }, {
                'rname': 'hive-server2',
                'rtype': 'HIVESERVER2',
                'rhost': self.hs2_host
            }, {
                'rname': 'hive-gateway',
                'rtype': 'GATEWAY',
                'rhosts': self.dn_hosts
            }]
        }, {
            'sname':
            'mapreduce',
            'stype':
            'MAPREDUCE',
            'roles': [{
                'rname': 'mapreduce-jobtracker',
                'rtype': 'JOBTRACKER',
                'rhost': self.jt_host
            }, {
                'rname': 'mapreduce-tasktracker',
                'rtype': 'TASKTRACKER',
                'rhosts': self.tt_hosts
            }]
        }]

        for sdata in service_data:
            self._create_service(sdata)

        # additional config for hive
        try:
            hive_service = self.cluster.get_service('hive')
            hive_metastore_host = self.cm_host  # should be same as cm's host, FQDN
            hive_metastore_name = 'hive'
            hive_metastore_password = '******'
            hive_metastore_database_port = '7432'
            hive_metastore_database_type = 'postgresql'
            hive_config = { 'hive_metastore_database_host' : hive_metastore_host, \
                            'hive_metastore_database_name' : hive_metastore_name, \
                            'hive_metastore_database_password' : hive_metastore_password, \
                            'hive_metastore_database_port' : hive_metastore_database_port, \
                            'hive_metastore_database_type' : hive_metastore_database_type }
            hive_service.update_config(hive_config)
            ok('Additional hive configs had been updated')
        except ApiException as e:
            err(e.message)

        # use auto configure for *-site.xml configs
        try:
            self.cluster.auto_configure()
        except ApiException as e:
            err(e.message)

    def start_cms(self):
        # start the management service
        info('Starting cloudera management service...')
        cms = self.cm.get_service()
        cms.start().wait()
        ok('Cloudera management service started successfully')

    def start_cdh(self):
        info('Excuting first run command. This might take a while.')
        cmd = self.cluster.first_run()

        while cmd.success == None:
            cmd = cmd.fetch()
            sleep(1)

        if cmd.success != True:
            err('The first run command failed: ' + cmd.resultMessage)

        ok('First run successfully executed. Your cluster has been set up!')
Ejemplo n.º 51
0
def main(passed_username, passed_password, passed_database):

    PEER_NAME = 'PRODUCTION'                        # Previously
    TARGET_CLUSTER_NAME = 'DEV'                     #     defined
    SOURCE_CLUSTER_NAME = 'cluster'                 #       at Experian

    cm_host = 'br1andvhmn11.passporthealth.com'


    cm_client.configuration.username = passed_username

    # Ensure that password is quoted
    cm_client.configuration.password = "******" + passed_password + "'"

    sourceDatabase = passed_database

    # Setup authentication for SSL
    cm_client.configuration.verify_ssl = True
    cm_client.configuration.ssl_ca_cert = '/opt/cloudera/security/pki/x509/truststore.pem'

    # Create an instance of the API class
    api_host = 'https://br1andvhmn11.passporthealth.com'
    port = '7183'
    api_version = 'v30'

    impala_host = 'br1anprhsn02.passporthealth.com'

    # Construct base URL for API
    # http://cmhost:7180/api/v30
    api_url = api_host + ':' + port + '/api/' + api_version
    api_client = cm_client.ApiClient(api_url)
    cluster_api_instance = cm_client.ClustersResourceApi(api_client)

    # Lists all known clusters.
    api_response = cluster_api_instance.read_clusters(view='SUMMARY')
    for cluster in api_response.items:
        print cluster.name, "-", cluster.full_version

        services_api_instance = cm_client.ServicesResourceApi(api_client)
        services = services_api_instance.read_services(cluster.name, view='FULL')
        for service in services.items:
    #        print service.display_name, "-", service.type
            if service.type == 'HIVE':
            targetHive = service
            targetCluster = cluster

            print targetHive.name, targetHive.service_state, targetHive.health_summary

            for health_check in targetHive.health_checks:
                print health_check.name, "---", health_check.summary

    #		print "Source database = " + sourceDatabase

    ###show_statement = "'show tables in " + sourceDatabase +"'"
    ###streamOperand = "impala-shell -i " + impala_host + " -d default -k --ssl --ca_cert=/opt/cloudera/security/pki/x509/truststore.pem -q " + show_statement
    ###stream=os.popen(streamOperand)
    ###
    ###output=stream.readlines()
    ###lineno =0
    ###numtables = 0
    ###tablenames = []
    ###for line in output:
    ###    if lineno <= 2:     # skip heading lines
    ###        pass
    ###    elif line[0:3] == "+--":                    # skip last line
    ###        pass
    ###    else:                                       # strip out tablename
    ###        name = line[2:]
    ###        blank = name.index(' ')
    ###        tablenames.append(name[0:blank])
    ###        numtables +=1
    ###    lineno +=1
    ###print str(numtables) + " tables in database " + sourceDatabase
    ###for table in tablenames:
    ###	print table


    tablenames = []
    tablenames.append("test")
    tablenames.append("test2")


    api_root = ApiResource(cm_host, username=passed_username, password=passed_password,  use_tls=True)

    PEER_NAME = 'PRODUCTION'
    SOURCE_HDFS_NAME = 'hdfs'
    TARGET_HDFS_NAME = 'hdfs'
    SOURCE_HIVE_NAME = 'hive'
    TARGET_HIVE_NAME = 'hive'
    SOURCE_CLUSTER_NAME = 'cluster'
    TARGET_CLUSTER_NAME = 'DEV'
    TARGET_YARN_SERVICE = 'yarn'




    # Setup for Hive replication
    hive =  api_root.get_cluster(TARGET_CLUSTER_NAME).get_service(TARGET_HIVE_NAME)
    hive_args = ApiHiveReplicationArguments(None)
    hdfs_args = ApiHdfsReplicationArguments(None)                   # Needed for replicating table data stored in HDFS
    hive_args.sourceService = ApiServiceRef(None, peerName=PEER_NAME, clusterName=SOURCE_CLUSTER_NAME, serviceName=SOURCE_HIVE_NAME)

    # Define tables to replicate
    table_filters = []
    table = ApiHiveTable(None)

    for tab in tablenames:
        table.database = (passed_database)
        table.tableName = (tab)
        table_filters = []
        table_filters.append(table)
        print "Replicating " + passed_database + "." + tab

        hive_args.tableFilters = table_filters
        hive_args.force = True                                          # Overwrite existing tables
        hive_args.replicateData = True                                  # Replicate table data stored in HDFS
        hdfs_args.skipChecksumChecks = True
        hdfs_args.skipListingChecksumChecks = True
        hdfs_args.preserveBlockSize = True
        hdfs_args.preserveReplicationCount = True
        hdfs_args.preservePermissions = True

        # Define HDFS portion of the Hive replication as needed
        hdfs_args.destinationPath = '/user/bob.marshall/repltest'       # Argument? Path relative to servicename?
        hdfs_args.mapreduceServiceName = TARGET_YARN_SERVICE
        hdfs_args.userName = passed_username
        hdfs_args.sourceUser = passed_username

        hive_args.hdfsArguments = hdfs_args

        start = datetime.datetime.now()
        end = start + datetime.timedelta(days=1)
        interval = "DAY"
        numinterval = 1
        pause = True

        print "Creating Hive Replication Schedule"
        schedule = hive.create_replication_schedule(start, end, interval, numinterval, pause, hive_args)
        print "Starting Hive Replication"
        cmd = hive.trigger_replication_schedule(schedule.id)
        print "Waiting for completion"
        cmd = cmd.wait()
        print "Getting result"
        result = hive.get_replication_schedule(schedule.id).history[0].hiveResult
        print result

        print "Cleanup... Remove Hive replication schedule"
        sch = hive.delete_replication_schedule(schedule.id)
        print sch

    exit(0)

#scheds = hive.get_replication_schedules()
#sch = hive.delete_replication_schedule(162)


# Setup for HDFS replication
hdfs = api_root.get_cluster(TARGET_CLUSTER_NAME).get_service(TARGET_HDFS_NAME)
hdfs_args = ApiHdfsReplicationArguments(None)
hdfs_args.sourceService = ApiServiceRef(None, peerName=PEER_NAME, clusterName=SOURCE_CLUSTER_NAME, serviceName=SOURCE_HDFS_NAME)
hdfs_args.sourcePath = '/user/bob.marshall/repltest'
hdfs_args.destinationPath = '/user/bob.marshall/repltest'
hdfs_args.mapreduceServiceName = TARGET_YARN_SERVICE
hdfs_args.userName = args.username
hdfs_args.sourceUser = args.username
hdfs_args.preserveBlockSize = True
hdfs_args.preserveReplicationCount = True
hdfs_args.preservePermissions = True
hdfs_args.skipChecksumChecks = True
hdfs_args.skipListingChecksumChecks = True
start = datetime.datetime.now()
end = start + datetime.timedelta(days=1)
interval = "DAY"
numinterval = 1
pause = True
#schedule = hdfs.create_replication_schedule(start, end, interval, interval, pause, hdfs_args)
print "Creating HDFS Replication Schedule"
schedule = hdfs.create_replication_schedule(start, end, "DAY", 1, True, hdfs_args)
print "Starting HDFS Replication"
cmd = hdfs.trigger_replication_schedule(schedule.id)
print "Waiting for completion"
cmd = cmd.wait()
print "Getting result"
result = hdfs.get_replication_schedule(schedule.id).history[0].hdfsResult
print result

print "Cleanup... Remove HDFS replication schedule"
sch = hdfs.delete_replication_schedule(schedule.id)
print sch

#scheds = hdfs.get_replication_schedules()
#sch = hdfs.delete_replication_schedule(27)

if __name__ == "__main__":

    parser = argparse.ArgumentParser(description='Perform BDR jobs while getting around BDR limitations.')
    parser.add_argument("username")
    parser.add_argument("password")
    parser.add_argument("database")
    args = parser.parse_args()

    main(args.username, args.password, args.database)
Ejemplo n.º 52
0
class ClouderaManagerDeployment(object):
    def __init__(self, cm_server_address, cm_server_port=DEFAULT_CM_PORT,
                 username=DEFAULT_CM_USERNAME, password=DEFAULT_CM_PASSWORD):
        self.cm_server_address = cm_server_address
        self.cm_server_port = cm_server_port
        self.username = username
        self.password = password

    def setup_api_resources(self):
        self.api = ApiResource(server_host=self.cm_server_address, server_port=self.cm_server_port,
                               username=self.username, password=self.password,
                               version=self._get_api_version())

        self.cm = self.api.get_cloudera_manager()
        self.cluster = self.api.get_cluster('Cluster 1 (clusterdock)')

    def prep_for_start(self):
        pass

    def validate_services_started(self, timeout_min=10, healthy_time_threshold_sec=30):
        start_validating_time = time()
        healthy_time = None

        logger.info('Beginning service health validation...')
        while healthy_time is None or (time() - healthy_time < healthy_time_threshold_sec):
            if (time() - start_validating_time < timeout_min * 60):
                all_services = list(self.cluster.get_all_services()) + [self.cm.get_service()]
                at_fault_services = list()
                for service in all_services:
                    if (service.serviceState != "NA" and service.serviceState != "STARTED"):
                        at_fault_services.append([service.name, "NOT STARTED"])
                    elif (service.serviceState != "NA" and service.healthSummary != "GOOD"):
                        checks = list()
                        for check in service.healthChecks:
                            if (check["summary"] not in ("GOOD", "DISABLED")):
                                checks.append(check["name"])
                        at_fault_services.append([service.name,
                                                 "Failed health checks: {0}".format(checks)])

                if not healthy_time or at_fault_services:
                    healthy_time = time() if not at_fault_services else None
                sleep(3)
            else:
                raise Exception(("Timed out after waiting {0} minutes for services to start "
                                "(at fault: {1}).").format(timeout_min, at_fault_services))
        logger.info("Validated that all services started (time: %.2f s).",
                    time() - start_validating_time)

    def add_hosts_to_cluster(self, secondary_node_fqdn, all_fqdns):
        cm_utils.add_hosts_to_cluster(api=self.api, cluster=self.cluster,
                                      secondary_node_fqdn=secondary_node_fqdn,
                                      all_fqdns=all_fqdns)

    def update_hive_metastore_namenodes(self):
        for service in self.cluster.get_all_services():
            if service.type == 'HIVE':
                logger.info('Updating NameNode references in Hive metastore...')
                update_metastore_namenodes_cmd = service.update_metastore_namenodes().wait()
                if not update_metastore_namenodes_cmd.success:
                    logger.warning(("Failed to update NameNode references in Hive metastore "
                                    "(command returned %s)."), update_metastore_namenodes_cmd)

    def update_database_configs(self):
        cm_utils.update_database_configs(api=self.api, cluster=self.cluster)

    def _get_api_version(self):
        api_version_response = requests.get(
            "http://{0}:{1}/api/version".format(self.cm_server_address,
                                                self.cm_server_port),
            auth=(self.username, self.password))
        api_version_response.raise_for_status()
        api_version = api_version_response.content
        if 'v' not in api_version:
            raise Exception("/api/version returned unexpected result (%s).", api_version)
        else:
            logger.info("Detected CM API %s.", api_version)
            return api_version.strip('v')