Ejemplo n.º 1
0
def main(args):
    image = '{}/{}/topology_nodebase:{}'.format(args.registry,
                                                args.namespace or DEFAULT_NAMESPACE,
                                                args.operating_system or DEFAULT_OPERATING_SYSTEM)

    if args.node_disks:
        node_disks = yaml.load(args.node_disks)
        logger.debug('Parsed node disks: %s.', node_disks)

    cluster = Cluster(*[Node(hostname=hostname, group='nodes', image=image,
                             devices=node_disks.get(hostname) if args.node_disks else None)
                        for hostname in args.nodes])
    cluster.start(args.network)
Ejemplo n.º 2
0
def main(args):
    quiet = not args.verbose

    node_image = '{}/{}/topology_apache_pulsar:pulsar-{}'.format(args.registry,
                                                                 args.namespace or DEFAULT_NAMESPACE,
                                                                 args.pulsar_version)
    ports = [{WEB_SERVICE_PORT: WEB_SERVICE_PORT} if args.predictable else WEB_SERVICE_PORT,
             {WEB_SERVICE_TLS_PORT: WEB_SERVICE_TLS_PORT} if args.predictable else WEB_SERVICE_TLS_PORT,
             {BROKER_SERVICE_PORT: BROKER_SERVICE_PORT} if args.predictable else BROKER_SERVICE_PORT,
             {BROKER_SERVICE_TLS_PORT: BROKER_SERVICE_TLS_PORT} if args.predictable else BROKER_SERVICE_TLS_PORT]

    clusterdock_config_host_dir = os.path.realpath(os.path.expanduser(args.clusterdock_config_directory))
    volumes = [{clusterdock_config_host_dir: CLUSTERDOCK_CLIENT_CONTAINER_DIR}]

    proxy_node = Node(hostname=args.proxy_node_name,
                      group='proxy',
                      image=node_image,
                      ports=ports,
                      volumes=volumes)
    broker_nodes = [Node(hostname=hostname, group='broker', image=node_image, volumes=volumes)
                    for hostname in args.broker_nodes]
    zk_nodes = [Node(hostname=hostname, group='zookeeper', image=node_image, volumes=volumes)
                for hostname in args.zookeeper_nodes]
    nodes = [proxy_node] + broker_nodes + zk_nodes
    cluster = Cluster(*nodes)
    cluster.start(args.network)

    logger.info('Starting pulsar cluster (%s) version %s ...', args.pulsar_cluster_name, args.pulsar_version)

    # zookeeper
    for idx, node in enumerate(zk_nodes, start=1):
        zookeeper_conf = node.get_file(ZOOKEEPER_CONF)
        zookeeper_properties = PropertiesFile.loads(zookeeper_conf)
        for srvidx, srvnode in enumerate(zk_nodes, start=1):
            zookeeper_properties['server.{}'.format(srvidx)] = '{}.{}:2888:3888'.format(srvnode.hostname,
                                                                                        cluster.network)
        node.put_file(ZOOKEEPER_CONF, PropertiesFile.dumps(zookeeper_properties))
        zookeeper_commands = [
            'mkdir -p {}/data/zookeeper'.format(PULSAR_HOME),
            'echo {} > {}/data/zookeeper/myid'.format(idx, PULSAR_HOME),
            '{}/bin/pulsar-daemon start zookeeper'.format(PULSAR_HOME)
        ]
        execute_node_command(node, ' && '.join(zookeeper_commands), quiet, 'Zookeeper start failed')

    web_service_url = 'http://{}.{}:{}'.format(proxy_node.hostname, cluster.network, WEB_SERVICE_PORT)
    web_service_url_tls = 'https://{}.{}:{}'.format(proxy_node.hostname, cluster.network, WEB_SERVICE_TLS_PORT)
    broker_service_url = 'pulsar://{}.{}:{}'.format(proxy_node.hostname, cluster.network, BROKER_SERVICE_PORT)
    broker_service_url_tls = 'pulsar+ssl://{}.{}:{}'.format(proxy_node.hostname, cluster.network,
                                                            BROKER_SERVICE_TLS_PORT)

    init_cluster_cmd = ('{home}/bin/pulsar initialize-cluster-metadata'
                        ' --cluster {cluster_name}'
                        ' --zookeeper {zkhostname}.{network}:2181'
                        ' --configuration-store {zkhostname}.{network}:2181'
                        ' --web-service-url {web_service_url}'
                        ' --web-service-url-tls {web_service_url_tls}'
                        ' --broker-service-url {broker_service_url}'
                        ' --broker-service-url-tls {broker_service_url_tls}'
                        .format(home=PULSAR_HOME,
                                cluster_name=args.pulsar_cluster_name,
                                zkhostname=zk_nodes[0].hostname,
                                hostname=proxy_node.hostname,
                                network=cluster.network,
                                web_service_url=web_service_url,
                                web_service_url_tls=web_service_url_tls,
                                broker_service_url=broker_service_url,
                                broker_service_url_tls=broker_service_url_tls))
    execute_node_command(zk_nodes[0], init_cluster_cmd, quiet, 'Cluster initialization failed')

    zk_servers_conf = ','.join(['{}.{}:2181'.format(node.hostname, cluster.network) for node in zk_nodes])

    # bookkeepers
    for node in broker_nodes:
        bookkeeper_conf = node.get_file(BOOKKEEPER_CONF)
        bookkeeper_properties = PropertiesFile.loads(bookkeeper_conf)
        bookkeeper_properties['zkServers'] = zk_servers_conf
        node.put_file(BOOKKEEPER_CONF, PropertiesFile.dumps(bookkeeper_properties))

        execute_node_command(node, '{}/bin/pulsar-daemon start bookie'.format(PULSAR_HOME), quiet,
                             'Bookkeeper start failed')
        execute_node_command(node, '{}/bin/bookkeeper shell bookiesanity'.format(PULSAR_HOME), quiet,
                             'Book keeper sanity check failed')

    # brokers
    for node in broker_nodes:
        broker_conf = node.get_file(BROKER_CONF)
        broker_properties = PropertiesFile.loads(broker_conf)
        broker_properties.update({'zookeeperServers': zk_servers_conf,
                                  'configurationStoreServers': zk_servers_conf,
                                  'clusterName': args.pulsar_cluster_name})
        node.put_file(BROKER_CONF, PropertiesFile.dumps(broker_properties))

    # proxy
    proxy_conf = proxy_node.get_file(PROXY_CONF)
    proxy_properties = PropertiesFile.loads(proxy_conf)
    proxy_properties.update({'zookeeperServers': zk_servers_conf,
                             'configurationStoreServers': zk_servers_conf,
                             'httpNumThreads': '8'})
    proxy_node.put_file(PROXY_CONF, PropertiesFile.dumps(proxy_properties))

    # TLS
    execute_node_command(proxy_node, 'rm -rf {}'.format(TLS_DIR), quiet=quiet)
    if args.tls:
        setup_commands = [
            'mkdir -p {}'.format(TLS_CLIENT_DIR),
            'wget -P {} {}'.format(TLS_DIR, TLS_CONF_URL),
            'mkdir -p {dir}/certs {dir}/crl {dir}/newcerts {dir}/private'.format(dir=TLS_DIR),
            'chmod 700 {}/private'.format(TLS_DIR),
            'touch {}/index.txt'.format(TLS_DIR),
            'echo "unique_subject = no" > {}/index.txt.attr'.format(TLS_DIR),
            'echo 1000 > {}/serial'.format(TLS_DIR),
        ]
        execute_node_command(proxy_node, ' && '.join(setup_commands), quiet, 'TLS system setup failed')

        ca_auth_commands = [
            'export CA_HOME={}'.format(TLS_DIR),
            'openssl genrsa -out {dir}/private/ca.key.pem 4096'.format(dir=TLS_DIR),
            'chmod 400 {}/private/ca.key.pem'.format(TLS_DIR),
            ('openssl req -config {dir}/openssl.cnf -key {dir}/private/ca.key.pem'
             ' -new -x509 -days 7300 -sha256 -extensions v3_ca -out {dir}/certs/ca.cert.pem'
             ' -subj "/C=US/ST=California/L=Palo Alto/O=My company/CN=*"').format(dir=TLS_DIR),
            'chmod 444 {}/certs/ca.cert.pem'.format(TLS_DIR),
            'cp {}/certs/ca.cert.pem {}'.format(TLS_DIR, TLS_CLIENT_DIR)
        ]
        execute_node_command(proxy_node, ' && '.join(ca_auth_commands), quiet,
                             'Certificate authority creation failed')

        server_cert_commands = [
            'export CA_HOME={}'.format(TLS_DIR),
            'openssl genrsa -out {}/broker.key.pem 2048'.format(TLS_DIR),
            ('openssl pkcs8 -topk8 -inform PEM -outform PEM -in {dir}/broker.key.pem'
             ' -out {dir}/broker.key-pk8.pem -nocrypt').format(dir=TLS_DIR),
            # comman name (CN) needs to be *.<nw> so as that <nw> hosts can access Pulsar cluster
            ('openssl req -config {dir}/openssl.cnf -key {dir}/broker.key.pem -new -sha256 -out {dir}/broker.csr.pem'
             ' -subj "/C=US/ST=California/L=Palo Alto/O=My company/CN=*.{nw}"').format(dir=TLS_DIR, nw=cluster.network),
            ('openssl ca -batch -config {dir}/openssl.cnf -extensions server_cert -days 1000 -notext -md sha256'
             ' -in {dir}/broker.csr.pem -out {dir}/broker.cert.pem').format(dir=TLS_DIR)
        ]
        execute_node_command(proxy_node, ' && '.join(server_cert_commands), quiet,
                             'Broker certificate creation failed')

        for node in broker_nodes:
            broker_conf = node.get_file(BROKER_CONF)
            broker_properties = PropertiesFile.loads(broker_conf)
            broker_properties.update({'brokerServicePortTls': '6651',
                                      'tlsEnabled': 'true',
                                      'tlsCertificateFilePath': '{}/broker.cert.pem'.format(TLS_DIR),
                                      'tlsKeyFilePath': '{}/broker.key-pk8.pem'.format(TLS_DIR),
                                      'tlsTrustCertsFilePath': '{}/certs/ca.cert.pem'.format(TLS_DIR),
                                      'webServicePortTls': '8443'})
            node.put_file(BROKER_CONF, PropertiesFile.dumps(broker_properties))

        proxy_conf = proxy_node.get_file(PROXY_CONF)
        proxy_properties = PropertiesFile.loads(proxy_conf)
        proxy_properties.update({'servicePortTls': '6651',
                                 'tlsEnabledInProxy': 'true',
                                 'tlsCertificateFilePath': '{}/broker.cert.pem'.format(TLS_DIR),
                                 'tlsKeyFilePath': '{}/broker.key-pk8.pem'.format(TLS_DIR),
                                 'tlsTrustCertsFilePath': '{}/certs/ca.cert.pem'.format(TLS_DIR),
                                 'tlsEnabledWithBroker': 'true',
                                 'brokerClientTrustCertsFilePath': '{}/certs/ca.cert.pem'.format(TLS_DIR),
                                 'webServicePortTls': '8443'})
        proxy_node.put_file(PROXY_CONF, PropertiesFile.dumps(proxy_properties))

        for node in nodes:
            client_conf = node.get_file(CLIENT_CONF)
            client_properties = PropertiesFile.loads(client_conf)
            client_properties.update({'webServiceUrl': web_service_url_tls,
                                      'brokerServiceUrl': broker_service_url_tls,
                                      'useTls': 'true',
                                      'tlsAllowInsecureConnection': 'false',
                                      'tlsTrustCertsFilePath': '{}/certs/ca.cert.pem'.format(TLS_DIR)})
            node.put_file(CLIENT_CONF, PropertiesFile.dumps(client_properties))

        # TLS auth
        if args.tls == 'authentication':
            client_cert_commands = [
                'export CA_HOME={}'.format(TLS_DIR),
                'openssl genrsa -out {}/admin.key.pem 2048'.format(TLS_DIR),
                ('openssl pkcs8 -topk8 -inform PEM -outform PEM -in {dir}/admin.key.pem'
                 ' -out {dir}/admin.key-pk8.pem -nocrypt').format(dir=TLS_DIR),
                # comman name (CN) needs to be admin - same as user principal in Pulsar
                ('openssl req -config {dir}/openssl.cnf -key {dir}/admin.key.pem -new -sha256 -out {dir}/admin.csr.pem'
                 ' -subj "/C=US/ST=California/L=Palo Alto/O=My company/CN=admin"').format(dir=TLS_DIR),
                ('openssl ca -batch -config {dir}/openssl.cnf -extensions usr_cert -days 1000 -notext -md sha256'
                 ' -in {dir}/admin.csr.pem -out {dir}/admin.cert.pem').format(dir=TLS_DIR),
                'mv {}/admin.* {}'.format(TLS_DIR, TLS_CLIENT_DIR)
            ]
            execute_node_command(proxy_node, ' && '.join(client_cert_commands), quiet,
                                 'Client certificate creation failed')

            proxy_cert_commands = [
                'export CA_HOME={}'.format(TLS_DIR),
                'openssl genrsa -out {}/proxy.key.pem 2048'.format(TLS_DIR),
                ('openssl pkcs8 -topk8 -inform PEM -outform PEM -in {dir}/proxy.key.pem'
                 ' -out {dir}/proxy.key-pk8.pem -nocrypt').format(dir=TLS_DIR),
                # comman name (CN) needs to be proxyadmin - same as proxy principal in Pulsar
                ('openssl req -config {dir}/openssl.cnf -key {dir}/proxy.key.pem -new -sha256 -out {dir}/proxy.csr.pem'
                 ' -subj "/C=US/ST=California/L=Palo Alto/O=My company/CN=proxyadmin"').format(dir=TLS_DIR),
                ('openssl ca -batch -config {dir}/openssl.cnf -extensions usr_cert -days 1000 -notext -md sha256'
                 ' -in {dir}/proxy.csr.pem -out {dir}/proxy.cert.pem').format(dir=TLS_DIR)
            ]
            execute_node_command(proxy_node, ' && '.join(proxy_cert_commands), quiet,
                                 'Proxy certificate creation failed')

            for node in broker_nodes:
                broker_conf = node.get_file(BROKER_CONF)
                broker_properties = PropertiesFile.loads(broker_conf)
                broker_properties.update({
                    'authenticationEnabled': 'true',
                    'authenticationProviders': 'org.apache.pulsar.broker.authentication.AuthenticationProviderTls',
                    'proxyRoles': 'proxyadmin',
                    'superUserRoles': 'proxyadmin,admin'})
                node.put_file(BROKER_CONF, PropertiesFile.dumps(broker_properties))

            proxy_conf = proxy_node.get_file(PROXY_CONF)
            proxy_properties = PropertiesFile.loads(proxy_conf)
            proxy_properties.update({
                'authenticationEnabled': 'true',
                'authenticationProviders': 'org.apache.pulsar.broker.authentication.AuthenticationProviderTls',
                'brokerClientAuthenticationPlugin': 'org.apache.pulsar.client.impl.auth.AuthenticationTls',
                'brokerClientAuthenticationParameters': ('tlsCertFile:{dir}/proxy.cert.pem,'
                                                         'tlsKeyFile:{dir}/proxy.key-pk8.pem').format(dir=TLS_DIR),
                'superUserRoles': 'admin'})
            proxy_node.put_file(PROXY_CONF, PropertiesFile.dumps(proxy_properties))

            for node in nodes:
                client_conf = node.get_file(CLIENT_CONF)
                client_properties = PropertiesFile.loads(client_conf)
                client_properties.update({'authPlugin': 'org.apache.pulsar.client.impl.auth.AuthenticationTls',
                                          'authParams': ('tlsCertFile:{dir}/admin.cert.pem,tlsKeyFile:'
                                                         '{dir}/admin.key-pk8.pem').format(dir=TLS_CLIENT_DIR)})
                node.put_file(CLIENT_CONF, PropertiesFile.dumps(client_properties))

    # start broker nodes and proxy node
    for node in broker_nodes:
        execute_node_command(node, '{}/bin/pulsar-daemon start broker'.format(PULSAR_HOME), quiet,
                             'Broker start failed')

    out_file = '{}/logs/pulsar-proxy-{}.{}.out'.format(PULSAR_HOME, proxy_node.hostname, cluster.network)
    execute_node_command(proxy_node, 'mkdir -p {}/logs'.format(PULSAR_HOME), quiet)
    execute_node_command(proxy_node,
                         'nohup {}/bin/pulsar proxy > "{}" 2>&1 < /dev/null &'.format(PULSAR_HOME, out_file),
                         quiet, 'Proxy start failed')

    logger.info('Performing health check on Pulsar cluster (%s) ...', args.pulsar_cluster_name)
    def condition(node, cluster_name, command):
        command_status = node.execute(command, quiet=True)
        return command_status.exit_code == 0 and command_status.output.splitlines()[-1].strip().strip('"') == cluster_name
    wait_for_condition(condition=condition, condition_args=[proxy_node, args.pulsar_cluster_name,
                                                            '{}/bin/pulsar-admin clusters list'.format(PULSAR_HOME)])

    logger.info('Pulsar cluster (%s) can be reached on docker network (%s):\n%s \n%s',
                args.pulsar_cluster_name, cluster.network,
                textwrap.indent('Web service URL: {}'.format(web_service_url), prefix='    '),
                textwrap.indent('Broker service URL: {}'.format(broker_service_url), prefix='    '))
    logger.log(logging.INFO if args.tls else -1,
               'Pulsar cluster (%s) can be reached securely on docker network (%s):\n%s \n%s',
               args.pulsar_cluster_name, cluster.network,
               textwrap.indent('Secure web service URL: {}'.format(web_service_url_tls), prefix='    '),
               textwrap.indent('Secure broker service URL: {}'.format(broker_service_url_tls), prefix='    '))
def main(args):
    kerberos_volume_dir = os.path.expanduser(args.kerberos_config_directory)

    image = '{}/{}/topology_nodebase:{}'.format(
        args.registry, args.namespace or DEFAULT_NAMESPACE,
        args.operating_system or DEFAULT_OPERATING_SYSTEM)
    nodes = [
        Node(hostname=hostname,
             group='nodes',
             image=image,
             volumes=[{
                 kerberos_volume_dir: KERBEROS_VOLUME_DIR
             }]) for hostname in args.nodes
    ]

    kdc_image = '{}/{}/topology_nodebase_kerberos:{}'.format(
        args.registry, args.namespace or DEFAULT_NAMESPACE,
        args.operating_system or DEFAULT_OPERATING_SYSTEM)
    kdc_hostname = args.kdc_node[0]
    kdc_node = Node(hostname=kdc_hostname,
                    group='kdc',
                    image=kdc_image,
                    volumes=[{
                        kerberos_volume_dir: KERBEROS_VOLUME_DIR
                    }])
    cluster = Cluster(kdc_node, *nodes)
    cluster.start(args.network)

    logger.info('Updating KDC configurations ...')
    realm = cluster.network.upper()
    krb5_conf_data = kdc_node.get_file(KDC_KRB5_CONF_FILENAME)
    kdc_node.put_file(
        KDC_KRB5_CONF_FILENAME,
        re.sub(
            r'EXAMPLE.COM', realm,
            re.sub(
                r'example.com', cluster.network,
                re.sub(r'kerberos.example.com',
                       r'{}.{}'.format(kdc_hostname,
                                       cluster.network), krb5_conf_data))))
    kdc_conf_data = kdc_node.get_file(KDC_CONF_FILENAME)
    kdc_node.put_file(
        KDC_CONF_FILENAME,
        re.sub(
            r'EXAMPLE.COM', realm,
            re.sub(r'\[kdcdefaults\]',
                   r'[kdcdefaults]\n max_renewablelife = 7d\n max_life = 1d',
                   kdc_conf_data)))
    acl_data = kdc_node.get_file(KDC_ACL_FILENAME)
    kdc_node.put_file(KDC_ACL_FILENAME, re.sub(r'EXAMPLE.COM', realm,
                                               acl_data))

    logger.info('Starting KDC ...')
    kdc_commands = [
        'kdb5_util create -s -r {realm} -P kdcadmin'.format(realm=realm),
        'kadmin.local -q "addprinc -pw {admin_pw} admin/admin@{realm}"'.format(
            admin_pw='acladmin', realm=realm)
    ]

    # Add the following commands before starting kadmin daemon etc.
    if args.kerberos_principals:
        principal_list = [
            '{}@{}'.format(principal, realm)
            for principal in args.kerberos_principals.split(',')
        ]
        create_principals_cmds = [
            'kadmin.local -q "addprinc -randkey {}"'.format(principal)
            for principal in principal_list
        ]
        kdc_commands.extend(create_principals_cmds)

        kdc_commands.append('rm -f {}'.format(KDC_KEYTAB_FILENAME))
        create_keytab_cmd = 'kadmin.local -q "xst -norandkey -k {} {}" '.format(
            KDC_KEYTAB_FILENAME, ' '.join(principal_list))
        kdc_commands.append(create_keytab_cmd)

    kdc_commands.extend(
        ['krb5kdc', 'kadmind', 'authconfig --enablekrb5 --update'])

    kdc_commands.append('cp -f {} {}'.format(KDC_KRB5_CONF_FILENAME,
                                             KERBEROS_VOLUME_DIR))
    if args.kerberos_principals:
        kdc_commands.append('chmod 644 {}'.format(KDC_KEYTAB_FILENAME))

    kdc_node.execute(command="bash -c '{}'".format('; '.join(kdc_commands)),
                     quiet=not args.verbose)

    logger.info('Validating service health ...')
    _validate_service_health(node=kdc_node,
                             services=['krb5kdc', 'kadmin'],
                             quiet=not args.verbose)
Ejemplo n.º 4
0
def main(args):
    quiet = not args.verbose
    print_topology_meta(args.topology)

    if args.include_services and args.exclude_services:
        raise ValueError(
            'Cannot pass both --include-services and --exclude-services.')

    image_prefix = '{}/{}/topology_hdp:hdp{}_ambari{}'.format(
        args.registry, args.namespace or DEFAULT_NAMESPACE, args.hdp_version,
        args.ambari_version)
    primary_node_image = '{}_{}'.format(image_prefix, 'primary-node')
    secondary_node_image = '{}_{}'.format(image_prefix, 'secondary-node')

    clusterdock_config_host_dir = os.path.realpath(
        os.path.expanduser(args.clusterdock_config_directory))
    volumes = [{clusterdock_config_host_dir: CLUSTERDOCK_CLIENT_CONTAINER_DIR}]

    primary_node = Node(hostname=args.primary_node[0],
                        group='primary',
                        volumes=volumes,
                        image=primary_node_image,
                        ports=[{
                            AMBARI_PORT: AMBARI_PORT
                        } if args.predictable else AMBARI_PORT])

    secondary_nodes = [
        Node(hostname=hostname,
             group='secondary',
             volumes=volumes,
             image=secondary_node_image) for hostname in args.secondary_nodes
    ]

    cluster = Cluster(primary_node, *secondary_nodes)
    cluster.primary_node = primary_node
    cluster.secondary_nodes = secondary_nodes

    for node in cluster.nodes:
        node.volumes.append({'/sys/fs/cgroup': '/sys/fs/cgroup'})
        # do not use tempfile.mkdtemp, as systemd wont be able to bring services up when temp ends to be created in
        # /var/tmp/ directory
        node.volumes.append(['/run', '/run/lock'])

    cluster.start(args.network)

    hdp_version_tuple = version_tuple(args.hdp_version)

    logger.debug('Starting PostgreSQL for Ambari server ...')

    # Need this as init system in Docker misreports on postgres start initially
    # Check https://github.com/docker-library/postgres/issues/146 for more
    def condition():
        primary_node.execute('service postgresql restart', quiet=quiet)
        if '1 row' in primary_node.execute(
                'PGPASSWORD=bigdata psql ambari '
                '-U ambari -h localhost -c "select 1"',
                quiet=quiet).output:
            return True

    wait_for_condition(condition=condition, time_between_checks=2)

    def condition():
        if 'running' in primary_node.execute('service postgresql status',
                                             quiet=quiet).output:
            return True

    wait_for_condition(condition=condition)

    time.sleep(
        10
    )  # If images are set to start Ambari server/agents - give some time to recover the right status
    _update_node_names(cluster, quiet=quiet)

    # The HDP topology uses two pre-built images ('primary' and 'secondary'). If a cluster
    # larger than 2 nodes is started, some modifications need to be done.
    if len(secondary_nodes) > 1:
        _remove_files(nodes=secondary_nodes[1:],
                      files=['/hadoop/hdfs/data/current/*'],
                      quiet=quiet)

    logger.info('Starting Ambari server ...')
    primary_node.execute('ambari-server start', quiet=quiet)

    # Docker for Mac exposes ports that can be accessed only with ``localhost:<port>`` so
    # use that instead of the hostname if the host name is ``moby``.
    hostname = ('localhost'
                if client.info().get('Name') == 'moby' else socket.getaddrinfo(
                    socket.gethostname(), 0, flags=socket.AI_CANONNAME)[0][3])
    port = cluster.primary_node.host_ports.get(AMBARI_PORT)
    server_url = 'http://{}:{}'.format(hostname, port)
    logger.info('Ambari server is now reachable at %s', server_url)

    logger.info('Starting Ambari agents ...')
    for node in cluster:
        logger.debug('Starting Ambari agent on %s ...', node.fqdn)
        node.execute('ambari-agent start', quiet=quiet)

    ambari = Ambari(server_url, username='******', password='******')

    def condition(ambari, cluster):
        cluster_hosts = {node.fqdn for node in cluster}
        ambari_hosts = {host.host_name for host in ambari.hosts}
        logger.debug('Cluster hosts: %s; Ambari hosts: %s', cluster_hosts,
                     ambari_hosts)
        return cluster_hosts == ambari_hosts

    wait_for_condition(condition=condition, condition_args=[ambari, cluster])

    service_types_to_leave = (args.include_services.upper().split(',')
                              if args.include_services else [])
    service_types_to_remove = (args.exclude_services.upper().split(',')
                               if args.exclude_services else [])
    if service_types_to_leave or service_types_to_remove:
        for service in list(ambari.clusters(DEFAULT_CLUSTER_NAME).services):
            service_name = service.service_name.upper()
            if (service_name in service_types_to_remove
                    or (service_types_to_leave
                        and service_name not in service_types_to_leave)):
                logger.info('Removing cluster service (name = %s) ...',
                            service_name)
                service.delete()

    for node in secondary_nodes[1:]:
        logger.info('Adding %s to cluster ...', node.fqdn)
        ambari.clusters(DEFAULT_CLUSTER_NAME).hosts.create(node.fqdn)
        secondary_node = ambari.clusters(DEFAULT_CLUSTER_NAME).hosts(
            secondary_nodes[0].fqdn)
        for component in secondary_node.components:
            logger.debug('Adding component (%s) to cluster on host (%s) ...',
                         component.component_name, node.fqdn)
            host_components = ambari.clusters(DEFAULT_CLUSTER_NAME).hosts(
                node.fqdn).components
            host_components.create(component.component_name).wait()

        logger.debug('Installing all registered components on host (%s) ...',
                     node.fqdn)
        ambari.clusters(DEFAULT_CLUSTER_NAME).hosts(
            node.fqdn).components.install().wait()

    logger.info('Waiting for all hosts to reach healthy state ...')

    def condition(ambari):
        health_report = ambari.clusters(DEFAULT_CLUSTER_NAME).health_report
        logger.debug('Ambari cluster health report: %s ...', health_report)
        return health_report.get('Host/host_state/HEALTHY') == len(
            list(ambari.hosts))

    wait_for_condition(condition=condition, condition_args=[ambari])

    service_names = [
        service['service_name'] for service in ambari.clusters(
            DEFAULT_CLUSTER_NAME).services.to_dict()
    ]

    if 'ATLAS' in service_names:
        logger.info('Configuring Atlas required properties ...')
        _configure_atlas(ambari,
                         args.hdp_version,
                         atlas_server_host=cluster.primary_node.fqdn)

    if 'HIVE' in service_names:
        primary_node.execute('touch /etc/hive/sys.db.created', quiet=quiet)

    logger.info('Waiting for components to be ready ...')

    def condition(ambari):
        comps = ambari.clusters(
            DEFAULT_CLUSTER_NAME).cluster.host_components.refresh()
        for comp in comps:
            if comp.state.upper() == 'UNKNOWN':
                logger.debug('Not ready with component `%s` ...',
                             comp.component_name)
                return False
        else:
            return True

    wait_for_condition(condition=condition, condition_args=[ambari])

    if not args.dont_start_cluster:
        logger.info('Starting cluster services ...')
        ambari.clusters(DEFAULT_CLUSTER_NAME).services.start().wait(
            timeout=3600)

        if 'HBASE' in service_names:
            logger.info('Starting Thrift server ...')
            if hdp_version_tuple <= (2, 0, 13, 0):
                hbase_daemon_path = '/usr/lib/hbase/bin/hbase-daemon.sh'
            else:
                hbase_daemon_path = '/usr/hdp/current/hbase-master/bin/hbase-daemon.sh'
            primary_node.execute('{} start thrift -p {} '
                                 '--infoport {}'.format(
                                     hbase_daemon_path,
                                     HBASE_THRIFT_SERVER_PORT,
                                     HBASE_THRIFT_SERVER_INFO_PORT),
                                 quiet=quiet)
Ejemplo n.º 5
0
def main(args):
    kerberos_volume_dir = os.path.expanduser(
        args.kerberos_config_directory or args.clusterdock_config_directory)

    # kerberos node.
    kdc_image = '{}/clusterdock/topology_nodebase_kerberos:centos6.8'.format(
        args.registry)
    kdc_hostname = args.kdc_node[0]
    kdc_node = Node(hostname=kdc_hostname,
                    group='kdc',
                    image=kdc_image,
                    volumes=[{
                        kerberos_volume_dir: KERBEROS_VOLUME_DIR
                    }])

    # webserver node. this is the reverse proxy that exposes the URLs
    webserver_image = '{}/{}/topology_http_kerberos:webserver'.format(
        args.registry, args.namespace or DEFAULT_NAMESPACE)
    webserver_hostname = args.webserver_node[0]
    webserver_node = Node(hostname=webserver_hostname,
                          group='webserver',
                          image=webserver_image,
                          volumes=[{
                              kerberos_volume_dir: KERBEROS_VOLUME_DIR
                          }],
                          ports={
                              80: 80,
                              443: 443
                          })

    # service node. the actual service (in our case, pretenders, which allows us to create mock http urls)
    service_hostname = args.service_node[0]
    service_node = Node(hostname=service_hostname,
                        group='service',
                        image='pretenders/pretenders:1.4',
                        ports={8000: 8000})

    cluster = Cluster(kdc_node, webserver_node, service_node)
    cluster.start(args.network)

    logger.info('Updating KDC configurations ...')
    realm = cluster.network.upper()
    krb5_conf_data = kdc_node.get_file(KDC_KRB5_CONF_FILENAME)
    kdc_node.put_file(
        KDC_KRB5_CONF_FILENAME,
        re.sub(
            r'EXAMPLE.COM', realm,
            re.sub(
                r'example.com', cluster.network,
                re.sub(r'kerberos.example.com',
                       r'{}.{}'.format(kdc_hostname,
                                       cluster.network), krb5_conf_data))))
    kdc_conf_data = kdc_node.get_file(KDC_CONF_FILENAME)
    kdc_node.put_file(
        KDC_CONF_FILENAME,
        re.sub(
            r'EXAMPLE.COM', realm,
            re.sub(r'\[kdcdefaults\]',
                   r'[kdcdefaults]\n max_renewablelife = 7d\n max_life = 1d',
                   kdc_conf_data)))
    acl_data = kdc_node.get_file(KDC_ACL_FILENAME)
    kdc_node.put_file(KDC_ACL_FILENAME, re.sub(r'EXAMPLE.COM', realm,
                                               acl_data))

    logger.info('Starting KDC ...')
    kdc_commands = [
        'kdb5_util create -s -r {realm} -P kdcadmin'.format(realm=realm),
        'kadmin.local -q "addprinc -pw {admin_pw} admin/admin@{realm}"'.format(
            admin_pw='acladmin', realm=realm)
    ]

    # Add two principals. One for the http service & the other for a client.
    principals = [{
        'principal':
        'HTTP/webserver.{}@{}'.format(cluster.network, realm),
        'keytab':
        SERVICE_KEYTAB_FILENAME
    }, {
        'principal':
        'HTTP/sdcwebserver.{}@{}'.format(cluster.network, realm),
        'keytab':
        '/etc/clusterdock/kerberos/sdcwebserver.keytab'
    }, {
        'principal': 'browser@{0}'.format(realm),
        'keytab': CLIENT_KEYTAB_FILENAME
    }]

    create_principals_cmds = [
        'kadmin.local -q "addprinc -randkey {}"'.format(principal['principal'])
        for principal in principals
    ]
    kdc_commands.extend(create_principals_cmds)

    # Delete any exisiting keytab files.
    kdc_commands.append('rm -f {}/*.keytab'.format(KERBEROS_VOLUME_DIR))

    create_keytab_cmds = [
        'kadmin.local -q "xst -norandkey -k {} {}"'.format(
            principal['keytab'], principal['principal'])
        for principal in principals
    ]
    kdc_commands.extend(create_keytab_cmds)

    kdc_commands.extend(
        ['krb5kdc', 'kadmind', 'authconfig --enablekrb5 --update'])

    kdc_commands.append('cp -f {} {}'.format(KDC_KRB5_CONF_FILENAME,
                                             KERBEROS_VOLUME_DIR))
    kdc_commands.extend([
        'chmod 644 {}'.format(principal['keytab']) for principal in principals
    ])

    kdc_node.execute(command="bash -c '{}'".format('; '.join(kdc_commands)),
                     quiet=not args.verbose)

    logger.info('Validating kerberos service health ...')
    _validate_service_health(node=kdc_node,
                             services=['krb5kdc', 'kadmin'],
                             quiet=not args.verbose)

    # copy self signed certificate and private key from image to clusterdock config location. Any consumer
    # can then import the certificate as a trusted certificate.
    webserver_node.execute(
        'cp /etc/ssl/certs/selfsigned.crt {ssl_cert_dir}/selfsigned.crt '
        '&& cp /etc/ssl/private/private.key {ssl_cert_dir}/private.key'.format(
            ssl_cert_dir=KERBEROS_VOLUME_DIR),
        quiet=not args.verbose)

    # copy the krb5.conf file from the shared location to /etc on the webserver node and start the webserver.
    webserver_node.execute('cp -p {}/krb5.conf {}'.format(
        KERBEROS_VOLUME_DIR, KDC_KRB5_CONF_FILENAME),
                           quiet=not args.verbose)
    webserver_node.execute('service httpd start', quiet=not args.verbose)

    logger.info('Validating web server health ...')
    _validate_service_health(node=webserver_node,
                             services=['httpd'],
                             quiet=not args.verbose)
Ejemplo n.º 6
0
def main(args):
    quiet = not args.verbose

    # Image name
    image = '{}/{}/topology_apache_kafka:kafka-{}-{}'.format(
        args.registry, args.namespace or DEFAULT_NAMESPACE, args.kafka_version,
        args.scala_version)

    # Nodes in the Kafka cluster
    nodes = [
        Node(hostname=hostname,
             group='brokers',
             ports=[ZOOKEEPER_PORT, BROKER_PORT],
             image=image) for hostname in args.brokers
    ]

    cluster = Cluster(*nodes)
    cluster.start(args.network, pull_images=args.always_pull)

    # Create distributed zookeeper configuration
    zookeeper_config = ('tickTime=2000\n'
                        'dataDir=/zookeeper\n'
                        'clientPort=2181\n'
                        'initLimit=5\n'
                        'syncLimit=2\n')
    for idx, node in enumerate(cluster):
        zookeeper_config += 'server.{}={}:2888:3888\n'.format(
            idx, node.hostname)

    # Start all zookeepers
    for idx, node in enumerate(cluster):
        logger.info('Starting Zookeeper on node {}'.format(node.hostname))
        node.execute('mkdir -p /zookeeper')
        node.put_file('/zookeeper/myid', str(idx))
        node.put_file('/zookeeper.properties', zookeeper_config)
        node.execute('/start_zookeeper &', detach=True)

    # Validate that Zookeepr is alive from each node
    for node in cluster:
        logger.info('Validating Zookeeper on node %s', node.hostname)
        wait_for_condition(condition=validate_zookeeper,
                           condition_args=[node, quiet],
                           time_between_checks=3,
                           timeout=60,
                           success=success,
                           failure=failure)

    # Start all brokers
    for idx, node in enumerate(cluster):
        logger.info('Starting Kafka on node {}'.format(node.hostname))

        kafka_config = node.get_file('/kafka/config/server.properties')
        kafka_config = kafka_config.replace('broker.id=0',
                                            'broker.id={}'.format(idx))
        node.put_file('/kafka.properties', kafka_config)

        node.execute('/start_kafka &', detach=True)

    # Verify that all Kafka brokers up
    logger.info('Waiting on all brokers to register in zookeeper')
    wait_for_condition(condition=validate_kafka,
                       condition_args=[nodes[0], len(nodes), quiet],
                       time_between_checks=3,
                       timeout=60,
                       success=success,
                       failure=failure)

    # Automatically create topics
    for topic in args.topics.split(','):
        logger.info('Creating topic %s', topic)
        nodes[0].execute('/create_topic {}'.format(topic), quiet=quiet)
Ejemplo n.º 7
0
def main(args):
    primary_node_image = "{0}/{1}/{2}:cdh-cm-primary-{3}".format(
        args.registry, args.clusterdock_namespace, args.image_name,
        args.version_string)

    secondary_node_image = "{0}/{1}/{2}:cdh-cm-secondary-{3}".format(
        args.registry, args.clusterdock_namespace, args.image_name,
        args.version_string)

    edge_node_image = "{0}/{1}/{2}:cdh-cm-edge-{3}".format(
        args.registry, args.clusterdock_namespace, args.image_name,
        args.version_string)

    # Docker's API for healthcheck uses units of nanoseconds. Define a constant
    # to make this more readable.
    SECONDS = 1000000000
    cm_server_healthcheck = {
        'test':
        'curl --silent --output /dev/null 127.0.0.1:{}'.format(CM_PORT),
        'interval': 1 * SECONDS,
        'timeout': 1 * SECONDS,
        'retries': 1,
        'start_period': 30 * SECONDS
    }
    primary_node = Node(hostname=args.primary_node[0],
                        group='primary',
                        image=primary_node_image,
                        ports=[{
                            CM_PORT: CM_PORT
                        }],
                        healthcheck=cm_server_healthcheck)
    secondary_nodes = [
        Node(hostname=hostname, group='secondary', image=secondary_node_image)
        for hostname in args.secondary_nodes
    ]

    edge_nodes = [
        Node(hostname=hostname, group='edge', image=edge_node_image)
        for hostname in args.edge_nodes
    ]

    all_nodes = [primary_node] + secondary_nodes + edge_nodes

    cluster = Cluster(*all_nodes)

    cluster.primary_node = primary_node

    secondary_node_group = NodeGroup(secondary_nodes)
    edge_node_group = NodeGroup(edge_nodes)

    cluster.start(args.network)

    filesystem_fix_commands = [
        'cp {0} {0}.1; umount {0}; mv -f {0}.1 {0}'.format(file_) for file_ in
        ['/etc/hosts', '/etc/resolv.conf', '/etc/hostname', '/etc/localtime']
    ]
    cluster.execute("bash -c '{}'".format('; '.join(filesystem_fix_commands)))

    # Use BSD tar instead of tar because it works bether with docker
    cluster.execute("ln -fs /usr/bin/bsdtar /bin/tar")

    _configure_cm_agents(cluster)

    if args.change_hostfile:
        update_hosts_file(cluster)

    # The CDH topology uses two pre-built images ('primary' and 'secondary'). If a cluster
    # larger than 2 nodes is started, some modifications need to be done to the nodes to
    # prevent duplicate heartbeats and things like that.
    if len(secondary_nodes) > 1:
        _remove_files(
            nodes=secondary_nodes[1:],
            files=['/var/lib/cloudera-scm-agent/uuid', '/dfs*/dn/current/*'])

    logger.info('Configuring Kerberos...')

    cluster.primary_node.execute('/root/configure-kerberos.sh', quiet=True)
    cluster.primary_node.execute('service krb5kdc start', quiet=True)
    cluster.primary_node.execute('service kadmin start', quiet=True)

    logger.info('Restarting Cloudera Manager agents ...')
    # _restart_cm_agents(cluster)

    logger.info('Waiting for Cloudera Manager server to come online ...')
    _wait_for_cm_server(primary_node)

    # Docker for Mac exposes ports that can be accessed only with ``localhost:<port>`` so
    # use that instead of the hostname if the host name is ``moby``.
    hostname = 'localhost' if client.info().get(
        'Name') == 'moby' else socket.gethostname()
    port = primary_node.host_ports.get(CM_PORT)
    server_url = 'http://{}:{}'.format(hostname, port)
    logger.info('Cloudera Manager server is now reachable at %s', server_url)

    # The work we need to do through CM itself begins here...
    deployment = ClouderaManagerDeployment(server_url)

    deployment.stop_cm_service()
    time.sleep(10)

    logger.info('Starting krb5kdc and kadmin ...')
    cluster.primary_node.execute('service krb5kdc start', quiet=True)
    cluster.primary_node.execute('service kadmin start', quiet=True)

    logger.info("Regenerating keytabs...")
    regenerate_keytabs(cluster, primary_node, deployment)

    logger.info("Adding hosts to cluster ...")
    # Add all CM hosts to the cluster (i.e. only new hosts that weren't part of the original
    # images).
    all_host_ids = {}
    for host in deployment.get_all_hosts():
        all_host_ids[host['hostId']] = host['hostname']
        for node in cluster:
            if node.fqdn == host['hostname']:
                node.host_id = host['hostId']
                break
        else:
            raise Exception('Could not find CM host with hostname {}.'.format(
                node.fqdn))
    cluster_host_ids = {
        host['hostId']
        for host in deployment.get_cluster_hosts(
            cluster_name=DEFAULT_CLUSTER_NAME)
    }
    host_ids_to_add = set(all_host_ids.keys()) - cluster_host_ids

    if host_ids_to_add:
        logger.debug(
            'Adding %s to cluster %s ...', 'host{} ({})'.format(
                's' if len(host_ids_to_add) > 1 else '',
                ', '.join(all_host_ids[host_id]
                          for host_id in host_ids_to_add)),
            DEFAULT_CLUSTER_NAME)
        deployment.add_cluster_hosts(cluster_name=DEFAULT_CLUSTER_NAME,
                                     host_ids=host_ids_to_add)

    _wait_for_activated_cdh_parcel(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME)

    # create and Apply host templates
    deployment.create_host_template(cluster_name='cluster',
                                    host_template_name='secondary',
                                    role_config_group_names=[
                                        'hdfs-DATANODE-BASE',
                                        'hbase-REGIONSERVER-BASE',
                                        'yarn-NODEMANAGER-BASE'
                                    ])
    deployment.create_host_template(cluster_name='cluster',
                                    host_template_name='edgenode',
                                    role_config_group_names=[
                                        'hive-GATEWAY-BASE',
                                        'hbase-GATEWAY-BASE',
                                        'hdfs-GATEWAY-BASE',
                                        'spark_on_yarn-GATEWAY-BASE'
                                    ])

    deployment.apply_host_template(cluster_name=DEFAULT_CLUSTER_NAME,
                                   host_template_name='secondary',
                                   start_roles=False,
                                   host_ids=host_ids_to_add)

    deployment.apply_host_template(cluster_name=DEFAULT_CLUSTER_NAME,
                                   host_template_name='edgenode',
                                   start_roles=False,
                                   host_ids=host_ids_to_add)

    logger.info('Updating database configurations ...')
    _update_database_configs(deployment=deployment,
                             cluster_name=DEFAULT_CLUSTER_NAME,
                             primary_node=primary_node)

    # deployment.update_database_configs()
    # deployment.update_hive_metastore_namenodes()

    logger.info("Update KDC Config  ")
    deployment.update_cm_config({
        'SECURITY_REALM': 'CLOUDERA',
        'KDC_HOST': 'node-1.cluster',
        'KRB_MANAGE_KRB5_CONF': 'true'
    })

    deployment.update_service_config(
        service_name='hbase',
        cluster_name=DEFAULT_CLUSTER_NAME,
        configs={'hbase_superuser': '******'})

    deployment.update_service_role_config_group_config(
        service_name='hive',
        cluster_name=DEFAULT_CLUSTER_NAME,
        role_config_group_name='hive-HIVESERVER2-BASE',
        configs={'hiveserver2_webui_port': '10009'})

    logger.info("Importing Credentials..")

    cluster.primary_node.execute(
        "curl -XPOST -u admin:admin http://{0}:{1}/api/v14/cm/commands/importAdminCredentials?username=cloudera-scm/admin@CLOUDERA&password=cloudera"
        .format(primary_node.fqdn, CM_PORT),
        quiet=True)
    logger.info("deploy cluster client config ...")
    deployment.deploy_cluster_client_config(cluster_name=DEFAULT_CLUSTER_NAME)

    logger.info("Configure for kerberos ...")
    cluster.primary_node.execute(
        "curl -XPOST -u admin:admin http://{0}:{1}/api/v14/cm/commands/configureForKerberos --data 'clustername={2}'"
        .format(primary_node.fqdn, CM_PORT, DEFAULT_CLUSTER_NAME),
        quiet=True)

    logger.info("Creating keytab files ...")
    cluster.execute('/root/create-keytab.sh', quiet=True)

    logger.info('Deploying client config ...')
    _deploy_client_config(deployment=deployment,
                          cluster_name=DEFAULT_CLUSTER_NAME)

    if not args.dont_start_cluster:
        logger.info('Starting cluster services ...')
        _start_service_command(deployment=deployment,
                               cluster_name=DEFAULT_CLUSTER_NAME,
                               service_name="zookeeper",
                               command="start")
        _start_service_command(deployment=deployment,
                               cluster_name=DEFAULT_CLUSTER_NAME,
                               service_name="hdfs",
                               command="start")
        if not args.skip_accumulo:
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="accumulo16",
                                   command="CreateHdfsDirCommand")
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="accumulo16",
                                   command="CreateAccumuloUserDirCommand")
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="accumulo16",
                                   command="AccumuloInitServiceCommand")
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="accumulo16",
                                   command="start")
        if not args.skip_yarn:
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="yarn",
                                   command="start")
        if not args.skip_hbase:
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="hbase",
                                   command="start")
        if not args.skip_flume:
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="flume",
                                   command="start")
        if not args.skip_spark:
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="spark_on_yarn",
                                   command="start")
        if not args.skip_sqoop:
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="sqoop",
                                   command="start")
        if not args.skip_hive:
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="hive",
                                   command="start")
        if not args.skip_oozie:
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="oozie",
                                   command="start")
        if not args.skip_hue:
            _start_service_command(deployment=deployment,
                                   cluster_name=DEFAULT_CLUSTER_NAME,
                                   service_name="hue",
                                   command="start")

        logger.info('Starting CM services ...')
        _start_cm_service(deployment=deployment)

    logger.info("Setting up HDFS Homedir ...")

    cluster.primary_node.execute(
        "kinit -kt /var/run/cloudera-scm-agent/process/*-hdfs-NAMENODE/hdfs.keytab hdfs/node-1.cluster@CLOUDERA",
        quiet=True)
    cluster.primary_node.execute("hadoop fs -mkdir /user/cloudera-scm",
                                 quiet=True)
    cluster.primary_node.execute(
        "hadoop fs -chown cloudera-scm:cloudera-scm /user/cloudera-scm",
        quiet=True)

    logger.info("Kinit cloudera-scm/admin ...")
    cluster.execute('kinit -kt /root/cloudera-scm.keytab cloudera-scm/admin',
                    quiet=True)

    logger.info("Executing post run script ...")
    secondary_node_group.execute("/root/post_run.sh")
    edge_node_group.execute("/root/post_run.sh")
Ejemplo n.º 8
0
def main(args):
    node_image = '{}/{}/clusterdock:greenplum{}'.format(
        args.registry, args.namespace or DEFAULT_NAMESPACE,
        args.greenplum_version)
    volumes = [{'/sys/fs/cgroup': '/sys/fs/cgroup'}, {'/run': '/run/lock'}]

    if args.predictable:
        ports = [{
            GREENPLUM_SQL_CLIENT_CONNECTION_PORT:
            GREENPLUM_SQL_CLIENT_CONNECTION_PORT
        }, {
            GREENPLUM_SSH_HOST_PORT: GREENPLUM_SSH_CONTAINER_PORT
        }, {
            GREENPLUM_GPSS_LISTENER_PORT: GREENPLUM_GPSS_LISTENER_PORT
        }, {
            GREENPLUM_GPFDIST_SERVICE_PORT: GREENPLUM_GPFDIST_SERVICE_PORT
        }]
    else:
        ports = [
            GREENPLUM_SQL_CLIENT_CONNECTION_PORT, GREENPLUM_SSH_CONTAINER_PORT,
            GREENPLUM_GPSS_LISTENER_PORT, GREENPLUM_GPFDIST_SERVICE_PORT
        ]
    primary_node = Node(hostname=args.primary_node[0],
                        group='primary',
                        image=node_image,
                        name='greenplum_{}'.format(args.greenplum_version),
                        ports=ports,
                        volumes=volumes)

    secondary_nodes = [
        Node(hostname=hostname,
             group='secondary',
             image=node_image,
             volumes=volumes) for hostname in args.secondary_nodes
    ]

    nodes = [primary_node] + secondary_nodes
    cluster = Cluster(*nodes)
    cluster.primary_node = primary_node
    cluster.start(args.network, pull_images=args.always_pull)

    primary_node.put_file(HOST_FILE_PATH, '\n'.join(args.secondary_nodes))
    primary_node.put_file(CONFIG_JSON_FILE_PATH, CONFIG_JSON)

    commands = [
        'source /usr/local/greenplum-db/greenplum_path.sh',
        'chmod 755 /home/gpadmin/prepare.sh',
        # Create segment hosts with 1 primary segment in each segment host.
        '/home/gpadmin/prepare.sh -s {} -n 1'.format(len(args.secondary_nodes)
                                                     ),
        # Initialize Greenplum Database system using  gpinitsystem_config file.
        'gpinitsystem -a -c /home/gpadmin/gpinitsystem_config'
    ]
    primary_node.execute(' && '.join(commands), user='******')

    commands = [
        'source /usr/local/greenplum-db/greenplum_path.sh',
        # To allow access to Greenplum Database from every host, change pg_hba.conf file.
        "echo 'host all all 0.0.0.0/0 trust' >> /home/gpadmin/master/gpseg-1/pg_hba.conf",
        'export MASTER_DATA_DIRECTORY=/home/gpadmin/master/gpseg-1',
        # Following will make sure the changes in pg_hba.conf take effect.
        '/usr/local/greenplum-db/bin/gpstop -u',
        'sudo ln -s /usr/local/greenplum-db-5.12.0/lib/libpq.so.5 /usr/lib64/libpq.so.5',
        'sudo ln -s /usr/local/greenplum-db-5.12.0/lib/libssl.so.1.0.0 /usr/lib64/libssl.so.1.0.0',
        'sudo ln -s /usr/local/greenplum-db-5.12.0/lib/libcrypto.so.1.0.0 /usr/lib64/libcrypto.so.1.0.0',
        'sudo ln -s /usr/local/greenplum-db-5.12.0/lib/libcom_err.so.3 /usr/lib64/libcom_err.so.3',
        # Create db and extension in it.
        '/usr/local/greenplum-db/bin/createdb some_db',
        "/usr/local/greenplum-db/bin/psql -d some_db -c 'CREATE EXTENSION  gpss;'"
    ]
    primary_node.execute(' && '.join(commands), user='******')

    # Start Greenplum Stream Server in detached mode since it waits indefinitely for client job requests.
    primary_node.execute(
        '/usr/local/greenplum-db/bin/gpss /home/gpadmin/config.json',
        user='******',
        detach=True)
Ejemplo n.º 9
0
def main(args):
    if args.license_url and not args.license_credentials:
        raise Exception(
            '--license-credentials is a required argument if --license-url is provided.'
        )

    image_prefix = '{}/{}/clusterdock:mapr{}'.format(
        args.registry, args.namespace or DEFAULT_NAMESPACE, args.mapr_version)
    if args.mep_version:
        image_prefix = '{}_mep{}'.format(image_prefix, args.mep_version)
    primary_node_image = '{}_{}'.format(image_prefix, 'primary-node')
    secondary_node_image = '{}_{}'.format(image_prefix, 'secondary-node')

    node_disks = yaml.load(args.node_disks)

    # MapR-FS needs each fileserver node to have a disk allocated for it, so fail fast if the
    # node disks map is missing any nodes.
    if set(args.primary_node + args.secondary_nodes) != set(node_disks):
        raise Exception(
            'Not all nodes are accounted for in the --node-disks dictionary')

    primary_node = Node(
        hostname=args.primary_node[0],
        group='primary',
        image=primary_node_image,
        ports=[{
            MCS_SERVER_PORT: MCS_SERVER_PORT
        } if args.predictable else MCS_SERVER_PORT],
        devices=node_disks.get(args.primary_node[0]),
        # Secure cluster needs the ticket to execute rest of commands
        # after cluster start.
        environment=['MAPR_TICKETFILE_LOCATION=/opt/mapr/conf/mapruserticket']
        if args.secure else [])

    secondary_nodes = [
        Node(hostname=hostname,
             group='secondary',
             image=secondary_node_image,
             devices=node_disks.get(hostname))
        for hostname in args.secondary_nodes
    ]

    cluster = Cluster(primary_node, *secondary_nodes)

    if args.secure:
        secure_config_host_dir = os.path.expanduser(
            args.secure_config_directory)
        volumes = [{secure_config_host_dir: SECURE_CONFIG_CONTAINER_DIR}]
        for node in cluster.nodes:
            node.volumes.extend(volumes)

    # MapR versions 6.0.0 onwards use CentOS 7 which needs following settings.
    mapr_version_tuple = tuple(int(i) for i in args.mapr_version.split('.'))
    if mapr_version_tuple >= EARLIEST_MAPR_VERSION_WITH_LICENSE_AND_CENTOS_7:
        for node in cluster.nodes:
            node.volumes.append({'/sys/fs/cgroup': '/sys/fs/cgroup'})
            temp_dir_name = tempfile.mkdtemp()
            logger.debug('Created temporary directory %s', temp_dir_name)
            node.volumes.append({temp_dir_name: '/run'})
    cluster.primary_node = primary_node
    cluster.start(args.network, pull_images=args.always_pull)

    logger.info('Generating new UUIDs ...')
    cluster.execute('/opt/mapr/server/mruuidgen > /opt/mapr/hostid')

    if not args.secure:
        logger.info('Configuring the cluster ...')
        for node in cluster:
            configure_command = (
                '/opt/mapr/server/configure.sh -C {0} -Z {0} -RM {0} -HS {0} '
                '-u mapr -g mapr -D {1}'.format(
                    primary_node.fqdn,
                    ','.join(node_disks.get(node.hostname))))
            node.execute("bash -c '{}'".format(configure_command))
    else:
        logger.info('Configuring native security for the cluster ...')
        configure_command = (
            '/opt/mapr/server/configure.sh -secure -genkeys -C {0} -Z {0} -RM {0} -HS {0} '
            '-u mapr -g mapr -D {1}'.format(
                primary_node.fqdn,
                ','.join(node_disks.get(primary_node.hostname))))
        source_files = [
            '{}/{}'.format(MAPR_CONFIG_DIR, file) for file in SECURE_FILES
        ]
        commands = [
            configure_command,
            'chmod 600 {}/{}'.format(MAPR_CONFIG_DIR, SSL_KEYSTORE_FILE),
            'cp -f {src} {dest_dir}'.format(
                src=' '.join(source_files),
                dest_dir=SECURE_CONFIG_CONTAINER_DIR)
        ]
        primary_node.execute(' && '.join(commands))
        for node in secondary_nodes:
            source_files = [
                '{}/{}'.format(SECURE_CONFIG_CONTAINER_DIR, file)
                for file in SECURE_FILES
            ]
            configure_command = (
                '/opt/mapr/server/configure.sh -secure -C {0} -Z {0} -RM {0} -HS {0} '
                '-u mapr -g mapr -D {1}'.format(
                    primary_node.fqdn,
                    ','.join(node_disks.get(node.hostname))))
            commands = [
                'cp -f {src} {dest_dir}'.format(src=' '.join(source_files),
                                                dest_dir=MAPR_CONFIG_DIR),
                configure_command
            ]
            node.execute(' && '.join(commands))

    logger.info('Waiting for MapR Control System server to come online ...')

    def condition(address, port):
        return socket().connect_ex((address, port)) == 0

    def success(time):
        logger.info('MapR Control System server is online after %s seconds.',
                    time)

    def failure(timeout):
        raise TimeoutError(
            'Timed out after {} seconds waiting '
            'for MapR Control System server to come online.'.format(timeout))

    wait_for_condition(
        condition=condition,
        condition_args=[primary_node.ip_address, MCS_SERVER_PORT],
        time_between_checks=3,
        timeout=180,
        success=success,
        failure=failure)
    mcs_server_host_port = primary_node.host_ports.get(MCS_SERVER_PORT)

    logger.info('Creating /apps/spark directory on %s ...',
                primary_node.hostname)
    spark_directory_command = [
        'hadoop fs -mkdir -p /apps/spark', 'hadoop fs -chmod 777 /apps/spark'
    ]
    primary_node.execute("bash -c '{}'".format(
        '; '.join(spark_directory_command)))

    logger.info('Creating MapR sample Stream named /sample-stream on %s ...',
                primary_node.hostname)
    primary_node.execute('maprcli stream create -path /sample-stream '
                         '-produceperm p -consumeperm p -topicperm p')

    if mapr_version_tuple >= EARLIEST_MAPR_VERSION_WITH_LICENSE_AND_CENTOS_7 and args.license_url:
        license_commands = [
            'curl --user {} {} > /tmp/lic'.format(args.license_credentials,
                                                  args.license_url),
            '/opt/mapr/bin/maprcli license add -license /tmp/lic -is_file true',
            'rm -rf /tmp/lic'
        ]
        logger.info('Applying license ...')
        primary_node.execute(' && '.join(license_commands))

    if not args.dont_register_gateway:
        logger.info('Registering gateway with the cluster ...')
        register_gateway_commands = [
            "cat /opt/mapr/conf/mapr-clusters.conf | egrep -o '^[^ ]* '"
            ' > /tmp/cluster-name',
            'maprcli cluster gateway set -dstcluster $(cat '
            '/tmp/cluster-name) -gateways {}'.format(primary_node.fqdn),
            'rm /tmp/cluster-name'
        ]
        primary_node.execute(' && '.join(register_gateway_commands))

    logger.info(
        'MapR Control System server is now accessible at https://%s:%s',
        getfqdn(), mcs_server_host_port)
Ejemplo n.º 10
0
def main(args):
    image_prefix = '{}/{}/topology_hdp:hdp{}_ambari{}'.format(
        args.registry, args.namespace or DEFAULT_NAMESPACE, args.hdp_version,
        args.ambari_version)
    primary_node_image = '{}_{}'.format(image_prefix, 'primary-node')
    secondary_node_image = '{}_{}'.format(image_prefix, 'secondary-node')

    primary_node = Node(hostname=args.primary_node[0],
                        group='primary',
                        image=primary_node_image,
                        ports=[{
                            AMBARI_PORT: AMBARI_PORT
                        } if args.predictable else AMBARI_PORT])

    secondary_nodes = [
        Node(hostname=hostname, group='secondary', image=secondary_node_image)
        for hostname in args.secondary_nodes
    ]

    cluster = Cluster(primary_node, *secondary_nodes)
    cluster.primary_node = primary_node
    cluster.secondary_nodes = secondary_nodes
    cluster.start(args.network)

    logger.debug('Starting PostgreSQL for Ambari server ...')
    primary_node.execute('service postgresql start', quiet=not args.verbose)
    _update_node_names(cluster, quiet=not args.verbose)

    # The HDP topology uses two pre-built images ('primary' and 'secondary'). If a cluster
    # larger than 2 nodes is started, some modifications need to be done.
    if len(secondary_nodes) > 1:
        _remove_files(nodes=secondary_nodes[1:],
                      files=['/hadoop/hdfs/data/current/*'])

    logger.info('Starting Ambari server ...')
    primary_node.execute('ambari-server start', quiet=not args.verbose)

    # Docker for Mac exposes ports that can be accessed only with ``localhost:<port>`` so
    # use that instead of the hostname if the host name is ``moby``.
    hostname = 'localhost' if client.info().get(
        'Name') == 'moby' else socket.gethostname()
    port = cluster.primary_node.host_ports.get(AMBARI_PORT)
    server_url = 'http://{}:{}'.format(hostname, port)
    logger.info('Ambari server is now reachable at %s', server_url)

    logger.info('Starting Ambari agents ...')
    for node in cluster:
        logger.debug('Starting Ambari agent on %s ...', node.fqdn)
        node.execute('ambari-agent start', quiet=not args.verbose)

    ambari = Ambari(server_url, username='******', password='******')

    def condition(ambari, cluster):
        cluster_hosts = {node.fqdn for node in cluster}
        ambari_hosts = {host.host_name for host in ambari.hosts}
        logger.debug('Cluster hosts: %s; Ambari hosts: %s', cluster_hosts,
                     ambari_hosts)
        return cluster_hosts == ambari_hosts

    wait_for_condition(condition=condition, condition_args=[ambari, cluster])

    for node in secondary_nodes[1:]:
        logger.info('Adding %s to cluster ...', node.fqdn)
        ambari.clusters('cluster').hosts.create(node.fqdn)
        for component in ambari.clusters('cluster').hosts(
                secondary_nodes[0].fqdn).components:
            logger.debug('Adding component (%s) to cluster on host (%s) ...',
                         component.component_name, node.fqdn)
            host_components = ambari.clusters('cluster').hosts(
                node.fqdn).components
            host_components.create(component.component_name).wait()

        logger.debug('Installing all registered components on host (%s) ...',
                     node.fqdn)
        ambari.clusters('cluster').hosts(node.fqdn).components.install().wait()

    if not args.dont_start_cluster:
        logger.debug(
            'Waiting for all hosts to reach healthy state before starting cluster ...'
        )

        def condition(ambari):
            health_report = ambari.clusters('cluster').health_report
            logger.debug('Ambari cluster health report: %s ...', health_report)
            return health_report.get('Host/host_state/HEALTHY') == len(
                list(ambari.hosts))

        wait_for_condition(condition=condition, condition_args=[ambari])

        logger.info('Starting cluster services ...')
        ambari.clusters('cluster').services.start().wait()
def main(args):
    quiet = not args.verbose

    # Image name
    image = '{}/{}/topology_confluent_schema_registry:schema_registry-{}'.format(args.registry,
                                                                                 args.namespace or DEFAULT_NAMESPACE,
                                                                                 args.confluent_version)

    # Nodes in the Kafka cluster
    nodes = [Node(hostname=hostname,
                  group='brokers',
                  ports=[{REST_PORT : REST_PORT}],
                  image=image)
             for hostname in args.nodes]

    cluster = Cluster(*nodes)
    cluster.start(args.network, pull_images=args.always_pull)

    # Create distributed zookeeper configuration
    zookeeper_config = ['tickTime=2000',
                        'dataDir=/zookeeper',
                        'clientPort=2181',
                        'initLimit=5',
                        'syncLimit=2']

    for idx, node in enumerate(cluster):
        zookeeper_config.append('server.{}={}:2888:3888'.format(idx, node.hostname))

    # Start all zookeepers
    for idx, node in enumerate(cluster):
        logger.info('Starting Zookeeper on node {}'.format(node.hostname))
        node.execute('mkdir -p /zookeeper')
        node.put_file('/zookeeper/myid', str(idx))
        node.put_file('/zookeeper.properties', '\n'.join(zookeeper_config))
        node.execute('/start_zookeeper &', detach=True)

    # Validate that Zookeepr is alive from each node
    for node in cluster:
        logger.info('Validating Zookeeper on node %s', node.hostname)
        wait_for_condition(condition=validate_zookeeper,
                           condition_args=[node, quiet],
                           time_between_checks=3,
                           timeout=60,
                           success=success,
                           failure=failure)

    # Start all brokers
    for idx, node in enumerate(cluster):
        logger.info('Starting Kafka on node {}'.format(node.hostname))

        kafka_config = node.get_file('/confluent/etc/kafka/server.properties')
        kafka_config = kafka_config.replace('broker.id=0', 'broker.id={}'.format(idx))
        node.put_file('/kafka.properties', kafka_config)

        node.execute('/start_kafka &', detach=True)

    # Verify that all Kafka brokers up
    logger.info('Waiting on all brokers to register in zookeeper')
    wait_for_condition(condition=validate_kafka,
                       condition_args=[nodes[0], len(nodes), quiet],
                       time_between_checks=3,
                       timeout=60,
                       success=success,
                       failure=failure)

    # Start schema registry on all nodes
    for idx, node in enumerate(cluster):
        logger.info('Starting Schema Registry on node {}'.format(node.hostname))
        node.execute('/start_schema_registry &', detach=True)