def main(args): primary_node_image = "{0}/{1}/{2}:cdh-cm-primary-{3}".format( args.registry, args.clusterdock_namespace, args.image_name, args.version_string) secondary_node_image = "{0}/{1}/{2}:cdh-cm-secondary-{3}".format( args.registry, args.clusterdock_namespace, args.image_name, args.version_string) edge_node_image = "{0}/{1}/{2}:cdh-cm-edge-{3}".format( args.registry, args.clusterdock_namespace, args.image_name, args.version_string) # Docker's API for healthcheck uses units of nanoseconds. Define a constant # to make this more readable. SECONDS = 1000000000 cm_server_healthcheck = { 'test': 'curl --silent --output /dev/null 127.0.0.1:{}'.format(CM_PORT), 'interval': 1 * SECONDS, 'timeout': 1 * SECONDS, 'retries': 1, 'start_period': 30 * SECONDS } primary_node = Node(hostname=args.primary_node[0], group='primary', image=primary_node_image, ports=[{ CM_PORT: CM_PORT }], healthcheck=cm_server_healthcheck) secondary_nodes = [ Node(hostname=hostname, group='secondary', image=secondary_node_image) for hostname in args.secondary_nodes ] edge_nodes = [ Node(hostname=hostname, group='edge', image=edge_node_image) for hostname in args.edge_nodes ] all_nodes = [primary_node] + secondary_nodes + edge_nodes cluster = Cluster(*all_nodes) cluster.primary_node = primary_node secondary_node_group = NodeGroup(secondary_nodes) edge_node_group = NodeGroup(edge_nodes) cluster.start(args.network) filesystem_fix_commands = [ 'cp {0} {0}.1; umount {0}; mv -f {0}.1 {0}'.format(file_) for file_ in ['/etc/hosts', '/etc/resolv.conf', '/etc/hostname', '/etc/localtime'] ] cluster.execute("bash -c '{}'".format('; '.join(filesystem_fix_commands))) # Use BSD tar instead of tar because it works bether with docker cluster.execute("ln -fs /usr/bin/bsdtar /bin/tar") _configure_cm_agents(cluster) if args.change_hostfile: update_hosts_file(cluster) # The CDH topology uses two pre-built images ('primary' and 'secondary'). If a cluster # larger than 2 nodes is started, some modifications need to be done to the nodes to # prevent duplicate heartbeats and things like that. if len(secondary_nodes) > 1: _remove_files( nodes=secondary_nodes[1:], files=['/var/lib/cloudera-scm-agent/uuid', '/dfs*/dn/current/*']) logger.info('Configuring Kerberos...') cluster.primary_node.execute('/root/configure-kerberos.sh', quiet=True) cluster.primary_node.execute('service krb5kdc start', quiet=True) cluster.primary_node.execute('service kadmin start', quiet=True) logger.info('Restarting Cloudera Manager agents ...') # _restart_cm_agents(cluster) logger.info('Waiting for Cloudera Manager server to come online ...') _wait_for_cm_server(primary_node) # Docker for Mac exposes ports that can be accessed only with ``localhost:<port>`` so # use that instead of the hostname if the host name is ``moby``. hostname = 'localhost' if client.info().get( 'Name') == 'moby' else socket.gethostname() port = primary_node.host_ports.get(CM_PORT) server_url = 'http://{}:{}'.format(hostname, port) logger.info('Cloudera Manager server is now reachable at %s', server_url) # The work we need to do through CM itself begins here... deployment = ClouderaManagerDeployment(server_url) deployment.stop_cm_service() time.sleep(10) logger.info('Starting krb5kdc and kadmin ...') cluster.primary_node.execute('service krb5kdc start', quiet=True) cluster.primary_node.execute('service kadmin start', quiet=True) logger.info("Regenerating keytabs...") regenerate_keytabs(cluster, primary_node, deployment) logger.info("Adding hosts to cluster ...") # Add all CM hosts to the cluster (i.e. only new hosts that weren't part of the original # images). all_host_ids = {} for host in deployment.get_all_hosts(): all_host_ids[host['hostId']] = host['hostname'] for node in cluster: if node.fqdn == host['hostname']: node.host_id = host['hostId'] break else: raise Exception('Could not find CM host with hostname {}.'.format( node.fqdn)) cluster_host_ids = { host['hostId'] for host in deployment.get_cluster_hosts( cluster_name=DEFAULT_CLUSTER_NAME) } host_ids_to_add = set(all_host_ids.keys()) - cluster_host_ids if host_ids_to_add: logger.debug( 'Adding %s to cluster %s ...', 'host{} ({})'.format( 's' if len(host_ids_to_add) > 1 else '', ', '.join(all_host_ids[host_id] for host_id in host_ids_to_add)), DEFAULT_CLUSTER_NAME) deployment.add_cluster_hosts(cluster_name=DEFAULT_CLUSTER_NAME, host_ids=host_ids_to_add) _wait_for_activated_cdh_parcel(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME) # create and Apply host templates deployment.create_host_template(cluster_name='cluster', host_template_name='secondary', role_config_group_names=[ 'hdfs-DATANODE-BASE', 'hbase-REGIONSERVER-BASE', 'yarn-NODEMANAGER-BASE' ]) deployment.create_host_template(cluster_name='cluster', host_template_name='edgenode', role_config_group_names=[ 'hive-GATEWAY-BASE', 'hbase-GATEWAY-BASE', 'hdfs-GATEWAY-BASE', 'spark_on_yarn-GATEWAY-BASE' ]) deployment.apply_host_template(cluster_name=DEFAULT_CLUSTER_NAME, host_template_name='secondary', start_roles=False, host_ids=host_ids_to_add) deployment.apply_host_template(cluster_name=DEFAULT_CLUSTER_NAME, host_template_name='edgenode', start_roles=False, host_ids=host_ids_to_add) logger.info('Updating database configurations ...') _update_database_configs(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, primary_node=primary_node) # deployment.update_database_configs() # deployment.update_hive_metastore_namenodes() logger.info("Update KDC Config ") deployment.update_cm_config({ 'SECURITY_REALM': 'CLOUDERA', 'KDC_HOST': 'node-1.cluster', 'KRB_MANAGE_KRB5_CONF': 'true' }) deployment.update_service_config( service_name='hbase', cluster_name=DEFAULT_CLUSTER_NAME, configs={'hbase_superuser': '******'}) deployment.update_service_role_config_group_config( service_name='hive', cluster_name=DEFAULT_CLUSTER_NAME, role_config_group_name='hive-HIVESERVER2-BASE', configs={'hiveserver2_webui_port': '10009'}) logger.info("Importing Credentials..") cluster.primary_node.execute( "curl -XPOST -u admin:admin http://{0}:{1}/api/v14/cm/commands/importAdminCredentials?username=cloudera-scm/admin@CLOUDERA&password=cloudera" .format(primary_node.fqdn, CM_PORT), quiet=True) logger.info("deploy cluster client config ...") deployment.deploy_cluster_client_config(cluster_name=DEFAULT_CLUSTER_NAME) logger.info("Configure for kerberos ...") cluster.primary_node.execute( "curl -XPOST -u admin:admin http://{0}:{1}/api/v14/cm/commands/configureForKerberos --data 'clustername={2}'" .format(primary_node.fqdn, CM_PORT, DEFAULT_CLUSTER_NAME), quiet=True) logger.info("Creating keytab files ...") cluster.execute('/root/create-keytab.sh', quiet=True) logger.info('Deploying client config ...') _deploy_client_config(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME) if not args.dont_start_cluster: logger.info('Starting cluster services ...') _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="zookeeper", command="start") _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="hdfs", command="start") if not args.skip_accumulo: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="accumulo16", command="CreateHdfsDirCommand") _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="accumulo16", command="CreateAccumuloUserDirCommand") _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="accumulo16", command="AccumuloInitServiceCommand") _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="accumulo16", command="start") if not args.skip_yarn: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="yarn", command="start") if not args.skip_hbase: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="hbase", command="start") if not args.skip_flume: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="flume", command="start") if not args.skip_spark: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="spark_on_yarn", command="start") if not args.skip_sqoop: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="sqoop", command="start") if not args.skip_hive: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="hive", command="start") if not args.skip_oozie: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="oozie", command="start") if not args.skip_hue: _start_service_command(deployment=deployment, cluster_name=DEFAULT_CLUSTER_NAME, service_name="hue", command="start") logger.info('Starting CM services ...') _start_cm_service(deployment=deployment) logger.info("Setting up HDFS Homedir ...") cluster.primary_node.execute( "kinit -kt /var/run/cloudera-scm-agent/process/*-hdfs-NAMENODE/hdfs.keytab hdfs/node-1.cluster@CLOUDERA", quiet=True) cluster.primary_node.execute("hadoop fs -mkdir /user/cloudera-scm", quiet=True) cluster.primary_node.execute( "hadoop fs -chown cloudera-scm:cloudera-scm /user/cloudera-scm", quiet=True) logger.info("Kinit cloudera-scm/admin ...") cluster.execute('kinit -kt /root/cloudera-scm.keytab cloudera-scm/admin', quiet=True) logger.info("Executing post run script ...") secondary_node_group.execute("/root/post_run.sh") edge_node_group.execute("/root/post_run.sh")
def main(args): if args.license_url and not args.license_credentials: raise Exception( '--license-credentials is a required argument if --license-url is provided.' ) image_prefix = '{}/{}/clusterdock:mapr{}'.format( args.registry, args.namespace or DEFAULT_NAMESPACE, args.mapr_version) if args.mep_version: image_prefix = '{}_mep{}'.format(image_prefix, args.mep_version) primary_node_image = '{}_{}'.format(image_prefix, 'primary-node') secondary_node_image = '{}_{}'.format(image_prefix, 'secondary-node') node_disks = yaml.load(args.node_disks) # MapR-FS needs each fileserver node to have a disk allocated for it, so fail fast if the # node disks map is missing any nodes. if set(args.primary_node + args.secondary_nodes) != set(node_disks): raise Exception( 'Not all nodes are accounted for in the --node-disks dictionary') primary_node = Node( hostname=args.primary_node[0], group='primary', image=primary_node_image, ports=[{ MCS_SERVER_PORT: MCS_SERVER_PORT } if args.predictable else MCS_SERVER_PORT], devices=node_disks.get(args.primary_node[0]), # Secure cluster needs the ticket to execute rest of commands # after cluster start. environment=['MAPR_TICKETFILE_LOCATION=/opt/mapr/conf/mapruserticket'] if args.secure else []) secondary_nodes = [ Node(hostname=hostname, group='secondary', image=secondary_node_image, devices=node_disks.get(hostname)) for hostname in args.secondary_nodes ] cluster = Cluster(primary_node, *secondary_nodes) if args.secure: secure_config_host_dir = os.path.expanduser( args.secure_config_directory) volumes = [{secure_config_host_dir: SECURE_CONFIG_CONTAINER_DIR}] for node in cluster.nodes: node.volumes.extend(volumes) # MapR versions 6.0.0 onwards use CentOS 7 which needs following settings. mapr_version_tuple = tuple(int(i) for i in args.mapr_version.split('.')) if mapr_version_tuple >= EARLIEST_MAPR_VERSION_WITH_LICENSE_AND_CENTOS_7: for node in cluster.nodes: node.volumes.append({'/sys/fs/cgroup': '/sys/fs/cgroup'}) temp_dir_name = tempfile.mkdtemp() logger.debug('Created temporary directory %s', temp_dir_name) node.volumes.append({temp_dir_name: '/run'}) cluster.primary_node = primary_node cluster.start(args.network, pull_images=args.always_pull) logger.info('Generating new UUIDs ...') cluster.execute('/opt/mapr/server/mruuidgen > /opt/mapr/hostid') if not args.secure: logger.info('Configuring the cluster ...') for node in cluster: configure_command = ( '/opt/mapr/server/configure.sh -C {0} -Z {0} -RM {0} -HS {0} ' '-u mapr -g mapr -D {1}'.format( primary_node.fqdn, ','.join(node_disks.get(node.hostname)))) node.execute("bash -c '{}'".format(configure_command)) else: logger.info('Configuring native security for the cluster ...') configure_command = ( '/opt/mapr/server/configure.sh -secure -genkeys -C {0} -Z {0} -RM {0} -HS {0} ' '-u mapr -g mapr -D {1}'.format( primary_node.fqdn, ','.join(node_disks.get(primary_node.hostname)))) source_files = [ '{}/{}'.format(MAPR_CONFIG_DIR, file) for file in SECURE_FILES ] commands = [ configure_command, 'chmod 600 {}/{}'.format(MAPR_CONFIG_DIR, SSL_KEYSTORE_FILE), 'cp -f {src} {dest_dir}'.format( src=' '.join(source_files), dest_dir=SECURE_CONFIG_CONTAINER_DIR) ] primary_node.execute(' && '.join(commands)) for node in secondary_nodes: source_files = [ '{}/{}'.format(SECURE_CONFIG_CONTAINER_DIR, file) for file in SECURE_FILES ] configure_command = ( '/opt/mapr/server/configure.sh -secure -C {0} -Z {0} -RM {0} -HS {0} ' '-u mapr -g mapr -D {1}'.format( primary_node.fqdn, ','.join(node_disks.get(node.hostname)))) commands = [ 'cp -f {src} {dest_dir}'.format(src=' '.join(source_files), dest_dir=MAPR_CONFIG_DIR), configure_command ] node.execute(' && '.join(commands)) logger.info('Waiting for MapR Control System server to come online ...') def condition(address, port): return socket().connect_ex((address, port)) == 0 def success(time): logger.info('MapR Control System server is online after %s seconds.', time) def failure(timeout): raise TimeoutError( 'Timed out after {} seconds waiting ' 'for MapR Control System server to come online.'.format(timeout)) wait_for_condition( condition=condition, condition_args=[primary_node.ip_address, MCS_SERVER_PORT], time_between_checks=3, timeout=180, success=success, failure=failure) mcs_server_host_port = primary_node.host_ports.get(MCS_SERVER_PORT) logger.info('Creating /apps/spark directory on %s ...', primary_node.hostname) spark_directory_command = [ 'hadoop fs -mkdir -p /apps/spark', 'hadoop fs -chmod 777 /apps/spark' ] primary_node.execute("bash -c '{}'".format( '; '.join(spark_directory_command))) logger.info('Creating MapR sample Stream named /sample-stream on %s ...', primary_node.hostname) primary_node.execute('maprcli stream create -path /sample-stream ' '-produceperm p -consumeperm p -topicperm p') if mapr_version_tuple >= EARLIEST_MAPR_VERSION_WITH_LICENSE_AND_CENTOS_7 and args.license_url: license_commands = [ 'curl --user {} {} > /tmp/lic'.format(args.license_credentials, args.license_url), '/opt/mapr/bin/maprcli license add -license /tmp/lic -is_file true', 'rm -rf /tmp/lic' ] logger.info('Applying license ...') primary_node.execute(' && '.join(license_commands)) if not args.dont_register_gateway: logger.info('Registering gateway with the cluster ...') register_gateway_commands = [ "cat /opt/mapr/conf/mapr-clusters.conf | egrep -o '^[^ ]* '" ' > /tmp/cluster-name', 'maprcli cluster gateway set -dstcluster $(cat ' '/tmp/cluster-name) -gateways {}'.format(primary_node.fqdn), 'rm /tmp/cluster-name' ] primary_node.execute(' && '.join(register_gateway_commands)) logger.info( 'MapR Control System server is now accessible at https://%s:%s', getfqdn(), mcs_server_host_port)