예제 #1
0
def start(args):
    image = "{0}/{1}/clusterdock:{2}_nodebase".format(args.registry_url,
                                                      args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
                                                      args.operating_system)
    if args.always_pull or not is_image_available_locally(image):
        pull_image(image)

    node_groups = [NodeGroup(name='nodes', nodes=[Node(hostname=hostname, network=args.network,
                                                       image=image)
                                                  for hostname in args.nodes])]
    cluster = Cluster(topology='nodebase', node_groups=node_groups, network_name=args.network)
    cluster.start()
예제 #2
0
def start(args):
    image = "{0}/{1}/clusterdock:{2}_nodebase".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.operating_system)
    if args.always_pull or not is_image_available_locally(image):
        pull_image(image)

    node_groups = [
        NodeGroup(name='nodes',
                  nodes=[
                      Node(hostname=hostname,
                           network=args.network,
                           image=image,
                           volumes=[]) for hostname in args.nodes
                  ])
    ]
    cluster = Cluster(node_groups=node_groups, network_name=args.network)
    cluster.start()
예제 #3
0
def start(args):
    primary_node_image = "{0}/{1}/clusterdock:{2}_{3}_primary-node".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.cdh_string, args.cm_string
    )

    secondary_node_image = "{0}/{1}/clusterdock:{2}_{3}_secondary-node".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.cdh_string, args.cm_string
    )

    for image in [primary_node_image, secondary_node_image]:
        if args.always_pull or not is_image_available_locally(image):
            logger.info("Pulling image %s. This might take a little while...", image)
            pull_image(image)

    CM_SERVER_PORT = 7180
    HUE_SERVER_PORT = 8888

    primary_node = Node(hostname=args.primary_node[0], network=args.network,
                        image=primary_node_image, ports=[CM_SERVER_PORT, HUE_SERVER_PORT])

    secondary_nodes = [Node(hostname=hostname, network=args.network, image=secondary_node_image)
                       for hostname in args.secondary_nodes]

    secondary_node_group = NodeGroup(name='secondary', nodes=secondary_nodes)
    node_groups = [NodeGroup(name='primary', nodes=[primary_node]),
                   secondary_node_group]

    cluster = Cluster(topology='cdh', node_groups=node_groups, network_name=args.network)
    cluster.start()

    '''
    A hack is needed here. In short, Docker mounts a number of files from the host into
    the container (and so do we). As such, when CM runs 'mount' inside of the containers
    during setup, it sees these ext4 files as suitable places in which to install things.
    Unfortunately, CM doesn't have a blacklist to ignore filesystem types and only including
    our containers' filesystem in the agents' config.ini whitelist is insufficient, since CM
    merges that list with the contents of /proc/filesystems. To work around this, we copy
    the culprit files inside of the container, which creates those files in aufs. We then
    unmount the volumes within the container and then move the files back to their original
    locations. By doing this, we preserve the contents of the files (which is necessary for
    things like networking to work properly) and keep CM happy.
    '''
    filesystem_fix_commands = []
    for file in ['/etc/hosts', '/etc/resolv.conf', '/etc/hostname', '/etc/localtime']:
        filesystem_fix_commands.append("cp {0} {0}.1; umount {0}; mv {0}.1 {0};".format(file))
    filesystem_fix_command = ' '.join(filesystem_fix_commands)
    cluster.ssh(filesystem_fix_command)

    change_cm_server_host(cluster, primary_node.fqdn)
    if len(secondary_nodes) > 1:
        additional_nodes = [node for node in secondary_nodes[1:]]
        remove_files(cluster, files=['/var/lib/cloudera-scm-agent/uuid',
                                     '/dfs*/dn/current/*'],
                     nodes=additional_nodes)

    # It looks like there may be something buggy when it comes to restarting the CM agent. Keep
    # going if this happens while we work on reproducing the problem.
    try:
        restart_cm_agents(cluster)
    except:
        pass

    logger.info('Waiting for Cloudera Manager server to come online...')
    cm_server_startup_time = wait_for_port_open(primary_node.ip_address,
                                                CM_SERVER_PORT, timeout_sec=180)
    logger.info("Detected Cloudera Manager server after %.2f seconds.", cm_server_startup_time)
    cm_server_web_ui_host_port = get_host_port_binding(primary_node.container_id,
                                                       CM_SERVER_PORT)

    logger.info("CM server is now accessible at http://%s:%s",
                getfqdn(), cm_server_web_ui_host_port)

    deployment = ClouderaManagerDeployment(cm_server_address=primary_node.ip_address)
    deployment.setup_api_resources()

    if len(cluster) > 2:
        deployment.add_hosts_to_cluster(secondary_node_fqdn=secondary_nodes[0].fqdn,
                                        all_fqdns=[node.fqdn for node in cluster])

    deployment.update_database_configs()
    deployment.update_hive_metastore_namenodes()

    if args.include_service_types:
        # CM maintains service types in CAPS, so make sure our args.include_service_types list
        # follows the same convention.
        service_types_to_leave = args.include_service_types.upper().split(',')
        for service in deployment.cluster.get_all_services():
            if service.type not in service_types_to_leave:
                logger.info('Removing service %s from %s...', service.name, deployment.cluster.displayName)
                deployment.cluster.delete_service(service.name)
    elif args.exclude_service_types:
        service_types_to_remove = args.exclude_service_types.upper().split(',')
        for service in deployment.cluster.get_all_services():
            if service.type in service_types_to_remove:
                logger.info('Removing service %s from %s...', service.name, deployment.cluster.displayName)
                deployment.cluster.delete_service(service.name)

    hue_server_host_port = get_host_port_binding(primary_node.container_id, HUE_SERVER_PORT)
    for service in deployment.cluster.get_all_services():
        if service.type == 'HUE':
            logger.info("Once its service starts, Hue server will be accessible at http://%s:%s",
                        getfqdn(), hue_server_host_port)
            break

    logger.info("Deploying client configuration...")
    deployment.cluster.deploy_client_config().wait()

    if not args.dont_start_cluster:
        logger.info('Starting cluster...')
        if not deployment.cluster.start().wait().success:
            raise Exception('Failed to start cluster.')
        logger.info('Starting Cloudera Management service...')
        if not deployment.cm.get_service().start().wait().success:
            raise Exception('Failed to start Cloudera Management service.')

        deployment.validate_services_started()

    logger.info("We'd love to know what you think of our CDH topology for clusterdock! Please "
                "direct any feedback to our community forum at "
                "http://tiny.cloudera.com/hadoop-101-forum.")
예제 #4
0
def build(args):
    """This function will be executed when ./bin/build_cluster apache_hbase is invoked."""

    # pylint: disable=too-many-locals
    # See start function above for rationale for disabling this warning.

    container_build_dir = join(CLUSTERDOCK_VOLUME, str(uuid4()))
    makedirs(container_build_dir)

    # If --hbase-git-commit is specified, we build HBase from source.
    if args.hbase_git_commit:
        build_hbase_commands = [
            "git clone https://github.com/apache/hbase.git {0}".format(
                container_build_dir),
            "git -C {0} checkout {1}".format(container_build_dir,
                                             args.hbase_git_commit),
            "mvn --batch-mode clean install -DskipTests assembly:single -f {0}/pom.xml"
            .format(container_build_dir)
        ]

        maven_image = Constants.docker_images.maven  # pylint: disable=no-member
        if not is_image_available_locally(maven_image):
            pull_image(maven_image)

        container_configs = {
            'command':
            'bash -c "{0}"'.format(' && '.join(build_hbase_commands)),
            'image':
            maven_image,
            'host_config':
            client.create_host_config(
                volumes_from=get_clusterdock_container_id())
        }

        maven_container_id = client.create_container(**container_configs)['Id']
        client.start(container=maven_container_id)
        for line in client.logs(container=maven_container_id, stream=True):
            stdout.write(line)
            stdout.flush()

        # Mimic docker run --rm by blocking on docker wait and then removing the container
        # if it encountered no errors.
        if client.wait(container=maven_container_id) == EX_OK:
            client.remove_container(container=maven_container_id, force=True)
        else:
            raise Exception('Error encountered while building HBase.')

        assembly_target_dir = join(container_build_dir, 'hbase-assembly',
                                   'target')
        for a_file in listdir(assembly_target_dir):
            if a_file.endswith('bin.tar.gz'):
                args.hbase_tarball = join(assembly_target_dir, a_file)
                break

    # Download all the binary tarballs into our temporary directory so that we can add them
    # into the Docker image we're building.
    filenames = []
    for tarball_location in [
            args.java_tarball, args.hadoop_tarball, args.hbase_tarball
    ]:
        tarball_filename = tarball_location.rsplit('/', 1)[-1]
        filenames.append(tarball_filename)

        # Download tarballs given as URLs.
        if container_build_dir not in tarball_location:
            get_request = requests.get(
                tarball_location,
                stream=True,
                cookies=({
                    'oraclelicense': 'accept-securebackup-cookie'
                } if tarball_location == args.java_tarball else None))
            # Raise Exception if download failed.
            get_request.raise_for_status()
            logger.info("Downloading %s...", tarball_filename)
            with open(join(container_build_dir, tarball_filename),
                      'wb') as file_descriptor:
                for chunk in get_request.iter_content(1024):
                    file_descriptor.write(chunk)
        else:
            move(tarball_location, container_build_dir)

    dockerfile_contents = r"""
    FROM {nodebase_image}
    COPY {java_tarball} /tarballs/
    RUN mkdir /java && tar -xf /tarballs/{java_tarball} -C /java --strip-components=1
    RUN echo "JAVA_HOME=/java" >> /etc/environment

    COPY {hadoop_tarball} /tarballs/
    RUN mkdir /hadoop && tar -xf /tarballs/{hadoop_tarball} -C /hadoop --strip-components=1
    COPY {hbase_tarball} /tarballs/
    RUN mkdir /hbase && tar -xf /tarballs/{hbase_tarball} -C /hbase --strip-components=1

    # Remove tarballs folder.
    RUN rm -rf /tarballs

    # Set PATH explicitly.
    RUN echo "PATH=/java/bin:/hadoop/bin:/hbase/bin/:$(echo $PATH)" >> /etc/environment

    # Add hbase user and group before copying root's SSH keys over.
    RUN groupadd hbase \
        && useradd -g hbase hbase \
        && cp -R /root/.ssh ~hbase \
        && chown -R hbase:hbase ~hbase/.ssh

    # Disable requiretty in /etc/sudoers as required by HBase chaos monkey.
    RUN sed -i 's/Defaults\s*requiretty/#&/' /etc/sudoers
    """.format(nodebase_image='/'.join([
        item for item in [
            args.registry_url, args.namespace or DEFAULT_APACHE_NAMESPACE,
            "clusterdock:{os}_nodebase".format(os=args.operating_system)
        ] if item
    ]),
               java_tarball=filenames[0],
               hadoop_tarball=filenames[1],
               hbase_tarball=filenames[2])

    logger.info("Created Dockerfile: %s", dockerfile_contents)

    with open(join(container_build_dir, 'Dockerfile'), 'w') as dockerfile:
        dockerfile.write(dockerfile_contents)

    image = '/'.join([
        item for item in [
            args.registry_url, args.namespace or DEFAULT_APACHE_NAMESPACE,
            "clusterdock:{os}_java-{java}_hadoop-{hadoop}_hbase-{hbase}".
            format(os=args.operating_system,
                   java=args.java_version,
                   hadoop=args.hadoop_version,
                   hbase=args.hbase_version)
        ] if item
    ])

    logger.info("Building image %s...", image)
    build_image(dockerfile=join(container_build_dir, 'Dockerfile'), tag=image)

    logger.info("Removing build temporary directory...")
    return [image]
예제 #5
0
def start(args):
    """This function will be executed when ./bin/start_cluster apache_hbase is invoked."""

    # pylint: disable=too-many-locals
    # Pylint doesn't want more than 15 local variables in a function; this one has 17. This is about
    # as low as I want to go because, while I can cheat and stuff unrelated things in a dictionary,
    # that won't improve readability.

    uuid = str(uuid4())
    container_cluster_config_dir = join(CLUSTERDOCK_VOLUME, uuid, 'config')
    makedirs(container_cluster_config_dir)

    for mount in client.inspect_container(
            get_clusterdock_container_id())['Mounts']:
        if mount['Destination'] == CLUSTERDOCK_VOLUME:
            host_cluster_config_dir = join(mount['Source'], uuid, 'config')
            break
    else:
        raise Exception(
            "Could not find source of {0} mount.".format(CLUSTERDOCK_VOLUME))

    # CLUSTERDOCK_VOLUME/uuid/config in the clusterdock container corresponds to
    # host_cluster_config_dir on the Docker host.
    logger.debug("Creating directory for cluster configuration files in %s...",
                 host_cluster_config_dir)

    # Generate the image name to use from the command line arguments passed in.
    image = '/'.join([
        item for item in [
            args.registry_url, args.namespace or DEFAULT_APACHE_NAMESPACE,
            "clusterdock:{os}_java-{java}_hadoop-{hadoop}_hbase-{hbase}".
            format(os=args.operating_system,
                   java=args.java_version,
                   hadoop=args.hadoop_version,
                   hbase=args.hbase_version)
        ] if item
    ])
    if args.always_pull or not is_image_available_locally(image):
        pull_image(image)

    # Before starting the cluster, we create a throwaway container from which we copy
    # configuration files back to the host. We also use this container to run an HBase
    # command that returns the port of the HBase master web UI. Since we aren't running init here,
    # we also have to manually pass in JAVA_HOME as an environmental variable.
    get_hbase_web_ui_port_command = (
        '/hbase/bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool '
        'hbase.master.info.port')
    container_id = client.create_container(
        image=image,
        command=get_hbase_web_ui_port_command,
        environment={'JAVA_HOME': '/java'})['Id']
    logger.debug(
        "Created temporary container (id: %s) from which to copy configuration files.",
        container_id)

    # Actually do the copying of Hadoop configs...
    _copy_container_folder_to_host(
        container_id, '/hadoop/etc/hadoop',
        join(container_cluster_config_dir, 'hadoop'),
        join(host_cluster_config_dir, 'hadoop'))

    # ... and repeat for HBase configs.
    _copy_container_folder_to_host(container_id, '/hbase/conf',
                                   join(container_cluster_config_dir, 'hbase'),
                                   join(host_cluster_config_dir, 'hbase'))

    logger.info(
        "The /hbase/lib folder on containers in the cluster will be volume mounted "
        "into %s...", join(host_cluster_config_dir, 'hbase-lib'))
    _copy_container_folder_to_host(
        container_id, '/hbase/lib',
        join(container_cluster_config_dir, 'hbase-lib'),
        join(host_cluster_config_dir, 'hbase-lib'))

    # Every node in the cluster will have a shared volume mount from the host for Hadoop and HBase
    # configuration files as well as the HBase lib folder.
    shared_volumes = [{
        join(host_cluster_config_dir, 'hadoop'):
        '/hadoop/etc/hadoop'
    }, {
        join(host_cluster_config_dir, 'hbase'): '/hbase/conf'
    }, {
        join(host_cluster_config_dir, 'hbase-lib'): '/hbase/lib'
    }]

    # Get the HBase master web UI port, stripping the newline the Docker REST API gives us.
    client.start(container=container_id)
    if client.wait(container=container_id) == EX_OK:
        hbase_master_web_ui_port = client.logs(container=container_id).rstrip()
        client.remove_container(container=container_id, force=True)
    else:
        raise Exception('Failed to remove HBase configuration container.')

    # Create the Node objects. These hold the state of our container nodes and will be started
    # at Cluster instantiation time.
    primary_node = Node(hostname=args.primary_node[0],
                        network=args.network,
                        image=image,
                        ports=[
                            NAMENODE_WEB_UI_PORT, hbase_master_web_ui_port,
                            RESOURCEMANAGER_WEB_UI_PORT, HBASE_REST_SERVER_PORT
                        ],
                        volumes=shared_volumes)
    secondary_nodes = []
    for hostname in args.secondary_nodes:
        # A list of service directories will be used to name folders on the host and, appended
        # with an index, in the container, as well (e.g. /data1/node-1/dfs:/dfs1).
        service_directories = ['dfs', 'yarn']

        # Every Node will have shared_volumes to let one set of configs on the host be propagated
        # to every container. If --data-directories is specified, this will be appended to allow
        # containers to use multiple disks on the host.
        volumes = shared_volumes[:]
        if args.data_directories:
            data_directories = args.data_directories.split(',')
            volumes += [{
                join(data_directory, uuid, hostname, service_directory):
                "/{0}{1}".format(service_directory, i)
            } for i, data_directory in enumerate(data_directories, start=1)
                        for service_directory in service_directories]
        secondary_nodes.append(
            Node(hostname=hostname,
                 network=args.network,
                 image=image,
                 volumes=volumes))

    Cluster(topology='apache_hbase',
            node_groups=[
                NodeGroup(name='primary', nodes=[primary_node]),
                NodeGroup(name='secondary', nodes=secondary_nodes)
            ],
            network_name=args.network).start()

    # When creating configs, pass in a dictionary of wildcards into create_configurations_from_file
    # to transform placeholders in the configurations.cfg file into real values.
    _create_configs_from_file(filename=args.configurations,
                              cluster_config_dir=container_cluster_config_dir,
                              wildcards={
                                  "primary_node": args.primary_node,
                                  "secondary_nodes": args.secondary_nodes,
                                  "all_nodes":
                                  args.primary_node + args.secondary_nodes,
                                  "network": args.network
                              })

    # After creating configurations from the configurations.cfg file, update hdfs-site.xml and
    # yarn-site.xml to use the data directories passed on the command line.
    if args.data_directories:
        _update_config_for_data_dirs(
            container_cluster_config_dir=container_cluster_config_dir,
            data_directories=data_directories)

    if not args.dont_start_services:
        _start_services(primary_node,
                        hbase_master_web_ui_port=hbase_master_web_ui_port)
예제 #6
0
def build(args):
    """This function will be executed when ./bin/build_cluster apache_hbase is invoked."""

    # pylint: disable=too-many-locals
    # See start function above for rationale for disabling this warning.

    container_build_dir = join(CLUSTERDOCK_VOLUME, str(uuid4()))
    makedirs(container_build_dir)

    # If --hbase-git-commit is specified, we build HBase from source.
    if args.hbase_git_commit:
        build_hbase_commands = [
            "git clone https://github.com/apache/hbase.git {0}".format(container_build_dir),
            "git -C {0} checkout {1}".format(container_build_dir, args.hbase_git_commit),
            "mvn --batch-mode clean install -DskipTests assembly:single -f {0}/pom.xml".format(
                container_build_dir
            )
        ]

        maven_image = Constants.docker_images.maven # pylint: disable=no-member
        if not is_image_available_locally(maven_image):
            pull_image(maven_image)

        container_configs = {
            'command': 'bash -c "{0}"'.format(' && '.join(build_hbase_commands)),
            'image': maven_image,
            'host_config': client.create_host_config(volumes_from=get_clusterdock_container_id())
        }

        maven_container_id = client.create_container(**container_configs)['Id']
        client.start(container=maven_container_id)
        for line in client.logs(container=maven_container_id, stream=True):
            stdout.write(line)
            stdout.flush()

        # Mimic docker run --rm by blocking on docker wait and then removing the container
        # if it encountered no errors.
        if client.wait(container=maven_container_id) == EX_OK:
            client.remove_container(container=maven_container_id, force=True)
        else:
            raise Exception('Error encountered while building HBase.')

        assembly_target_dir = join(container_build_dir, 'hbase-assembly', 'target')
        for a_file in listdir(assembly_target_dir):
            if a_file.endswith('bin.tar.gz'):
                args.hbase_tarball = join(assembly_target_dir, a_file)
                break

    # Download all the binary tarballs into our temporary directory so that we can add them
    # into the Docker image we're building.
    filenames = []
    for tarball_location in [args.java_tarball, args.hadoop_tarball, args.hbase_tarball]:
        tarball_filename = tarball_location.rsplit('/', 1)[-1]
        filenames.append(tarball_filename)

        # Download tarballs given as URLs.
        if container_build_dir not in tarball_location:
            get_request = requests.get(tarball_location, stream=True, cookies=(
                {'oraclelicense': 'accept-securebackup-cookie'}
                if tarball_location == args.java_tarball
                else None
            ))
            # Raise Exception if download failed.
            get_request.raise_for_status()
            logger.info("Downloading %s...", tarball_filename)
            with open(join(container_build_dir, tarball_filename), 'wb') as file_descriptor:
                for chunk in get_request.iter_content(1024):
                    file_descriptor.write(chunk)
        else:
            move(tarball_location, container_build_dir)

    dockerfile_contents = r"""
    FROM {nodebase_image}
    COPY {java_tarball} /tarballs/
    RUN mkdir /java && tar -xf /tarballs/{java_tarball} -C /java --strip-components=1
    RUN echo "JAVA_HOME=/java" >> /etc/environment

    COPY {hadoop_tarball} /tarballs/
    RUN mkdir /hadoop && tar -xf /tarballs/{hadoop_tarball} -C /hadoop --strip-components=1
    COPY {hbase_tarball} /tarballs/
    RUN mkdir /hbase && tar -xf /tarballs/{hbase_tarball} -C /hbase --strip-components=1

    # Remove tarballs folder.
    RUN rm -rf /tarballs

    # Set PATH explicitly.
    RUN echo "PATH=/java/bin:/hadoop/bin:/hbase/bin/:$(echo $PATH)" >> /etc/environment

    # Add hbase user and group before copying root's SSH keys over.
    RUN groupadd hbase \
        && useradd -g hbase hbase \
        && cp -R /root/.ssh ~hbase \
        && chown -R hbase:hbase ~hbase/.ssh

    # Disable requiretty in /etc/sudoers as required by HBase chaos monkey.
    RUN sed -i 's/Defaults\s*requiretty/#&/' /etc/sudoers
    """.format(nodebase_image='/'.join([item
                                        for item in [args.registry_url,
                                                     args.namespace or DEFAULT_APACHE_NAMESPACE,
                                                     "clusterdock:{os}_nodebase".format(
                                                         os=args.operating_system
                                                     )]
                                        if item]),
               java_tarball=filenames[0], hadoop_tarball=filenames[1], hbase_tarball=filenames[2])

    logger.info("Created Dockerfile: %s", dockerfile_contents)

    with open(join(container_build_dir, 'Dockerfile'), 'w') as dockerfile:
        dockerfile.write(dockerfile_contents)

    image = '/'.join(
        [item
         for item in [args.registry_url, args.namespace or DEFAULT_APACHE_NAMESPACE,
                      "clusterdock:{os}_java-{java}_hadoop-{hadoop}_hbase-{hbase}".format(
                          os=args.operating_system, java=args.java_version,
                          hadoop=args.hadoop_version, hbase=args.hbase_version
                      )]
         if item])

    logger.info("Building image %s...", image)
    build_image(dockerfile=join(container_build_dir, 'Dockerfile'), tag=image)

    logger.info("Removing build temporary directory...")
    return [image]
예제 #7
0
def start(args):
    """This function will be executed when ./bin/start_cluster apache_hbase is invoked."""

    # pylint: disable=too-many-locals
    # Pylint doesn't want more than 15 local variables in a function; this one has 17. This is about
    # as low as I want to go because, while I can cheat and stuff unrelated things in a dictionary,
    # that won't improve readability.

    uuid = str(uuid4())
    container_cluster_config_dir = join(CLUSTERDOCK_VOLUME, uuid, 'config')
    makedirs(container_cluster_config_dir)

    for mount in client.inspect_container(get_clusterdock_container_id())['Mounts']:
        if mount['Destination'] == CLUSTERDOCK_VOLUME:
            host_cluster_config_dir = join(mount['Source'], uuid, 'config')
            break
    else:
        raise Exception("Could not find source of {0} mount.".format(CLUSTERDOCK_VOLUME))

    # CLUSTERDOCK_VOLUME/uuid/config in the clusterdock container corresponds to
    # host_cluster_config_dir on the Docker host.
    logger.debug("Creating directory for cluster configuration files in %s...",
                 host_cluster_config_dir)

    # Generate the image name to use from the command line arguments passed in.
    image = '/'.join(
        [item
         for item in [args.registry_url, args.namespace or DEFAULT_APACHE_NAMESPACE,
                      "clusterdock:{os}_java-{java}_hadoop-{hadoop}_hbase-{hbase}".format(
                          os=args.operating_system, java=args.java_version,
                          hadoop=args.hadoop_version, hbase=args.hbase_version
                      )]
         if item]
    )
    if args.always_pull or not is_image_available_locally(image):
        pull_image(image)

    # Before starting the cluster, we create a throwaway container from which we copy
    # configuration files back to the host. We also use this container to run an HBase
    # command that returns the port of the HBase master web UI. Since we aren't running init here,
    # we also have to manually pass in JAVA_HOME as an environmental variable.
    get_hbase_web_ui_port_command = ('/hbase/bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool '
                                     'hbase.master.info.port')
    container_id = client.create_container(image=image, command=get_hbase_web_ui_port_command,
                                           environment={'JAVA_HOME': '/java'})['Id']
    logger.debug("Created temporary container (id: %s) from which to copy configuration files.",
                 container_id)

    # Actually do the copying of Hadoop configs...
    _copy_container_folder_to_host(container_id, '/hadoop/etc/hadoop',
                                   join(container_cluster_config_dir, 'hadoop'),
                                   join(host_cluster_config_dir, 'hadoop'))

    # ... and repeat for HBase configs.
    _copy_container_folder_to_host(container_id, '/hbase/conf',
                                   join(container_cluster_config_dir, 'hbase'),
                                   join(host_cluster_config_dir, 'hbase'))

    logger.info("The /hbase/lib folder on containers in the cluster will be volume mounted "
                "into %s...", join(host_cluster_config_dir, 'hbase-lib'))
    _copy_container_folder_to_host(container_id, '/hbase/lib',
                                   join(container_cluster_config_dir, 'hbase-lib'),
                                   join(host_cluster_config_dir, 'hbase-lib'))

    # Every node in the cluster will have a shared volume mount from the host for Hadoop and HBase
    # configuration files as well as the HBase lib folder.
    shared_volumes = [{join(host_cluster_config_dir, 'hadoop'): '/hadoop/etc/hadoop'},
                      {join(host_cluster_config_dir, 'hbase'): '/hbase/conf'},
                      {join(host_cluster_config_dir, 'hbase-lib'): '/hbase/lib'}]

    # Get the HBase master web UI port, stripping the newline the Docker REST API gives us.
    client.start(container=container_id)
    if client.wait(container=container_id) == EX_OK:
        hbase_master_web_ui_port = client.logs(container=container_id).rstrip()
        client.remove_container(container=container_id, force=True)
    else:
        raise Exception('Failed to remove HBase configuration container.')

    # Create the Node objects. These hold the state of our container nodes and will be started
    # at Cluster instantiation time.
    primary_node = Node(hostname=args.primary_node[0], network=args.network,
                        image=image, ports=[NAMENODE_WEB_UI_PORT,
                                            hbase_master_web_ui_port,
                                            RESOURCEMANAGER_WEB_UI_PORT,
                                            HBASE_REST_SERVER_PORT],
                        volumes=shared_volumes)
    secondary_nodes = []
    for hostname in args.secondary_nodes:
        # A list of service directories will be used to name folders on the host and, appended
        # with an index, in the container, as well (e.g. /data1/node-1/dfs:/dfs1).
        service_directories = ['dfs', 'yarn']

        # Every Node will have shared_volumes to let one set of configs on the host be propagated
        # to every container. If --data-directories is specified, this will be appended to allow
        # containers to use multiple disks on the host.
        volumes = shared_volumes[:]
        if args.data_directories:
            data_directories = args.data_directories.split(',')
            volumes += [{join(data_directory, uuid, hostname, service_directory):
                             "/{0}{1}".format(service_directory, i)}
                        for i, data_directory in enumerate(data_directories, start=1)
                        for service_directory in service_directories]
        secondary_nodes.append(Node(hostname=hostname,
                                    network=args.network,
                                    image=image,
                                    volumes=volumes))

    Cluster(topology='apache_hbase',
            node_groups=[NodeGroup(name='primary', nodes=[primary_node]),
                         NodeGroup(name='secondary', nodes=secondary_nodes)],
            network_name=args.network).start()

    # When creating configs, pass in a dictionary of wildcards into create_configurations_from_file
    # to transform placeholders in the configurations.cfg file into real values.
    _create_configs_from_file(filename=args.configurations,
                              cluster_config_dir=container_cluster_config_dir,
                              wildcards={"primary_node": args.primary_node,
                                         "secondary_nodes": args.secondary_nodes,
                                         "all_nodes": args.primary_node + args.secondary_nodes,
                                         "network": args.network})

    # After creating configurations from the configurations.cfg file, update hdfs-site.xml and
    # yarn-site.xml to use the data directories passed on the command line.
    if args.data_directories:
        _update_config_for_data_dirs(
            container_cluster_config_dir=container_cluster_config_dir,
            data_directories=data_directories
        )

    if not args.dont_start_services:
        _start_services(primary_node, hbase_master_web_ui_port=hbase_master_web_ui_port)
예제 #8
0
def start(args):
    primary_node_image = "{0}/{1}/clusterdock:{2}_{3}_primary-node".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.cdh_string, args.cm_string
    )

    secondary_node_image = "{0}/{1}/clusterdock:{2}_{3}_secondary-node".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.cdh_string, args.cm_string
    )

    for image in [primary_node_image, secondary_node_image]:
        if args.always_pull or not is_image_available_locally(image):
            logger.info("Pulling image %s. This might take a little while...", image)
            pull_image(image)

    CM_SERVER_PORT = 7180

    primary_node = Node(hostname=args.primary_node[0], network=args.network,
                        image=primary_node_image, ports=[CM_SERVER_PORT],
                        volumes=[])

    secondary_nodes = [Node(hostname=hostname, network=args.network, image=secondary_node_image,
                            volumes=[])
                       for hostname in args.secondary_nodes]

    secondary_node_group = NodeGroup(name='secondary', nodes=secondary_nodes)
    node_groups = [NodeGroup(name='primary', nodes=[primary_node]),
                   secondary_node_group]

    cluster = Cluster(node_groups=node_groups, network_name=args.network)
    cluster.start()

    '''
    A hack is needed here. In short, Docker mounts a number of files from the host into
    the container (and so do we). As such, when CM runs 'mount' inside of the containers
    during setup, it sees these ext4 files as suitable places in which to install things.
    Unfortunately, CM doesn't have a blacklist to ignore filesystem types and only including
    our containers' filesystem in the agents' config.ini whitelist is insufficient, since CM
    merges that list with the contents of /proc/filesystems. To work around this, we copy
    the culprit files inside of the container, which creates those files in aufs. We then
    unmount the volumes within the container and then move the files back to their original
    locations. By doing this, we preserve the contents of the files (which is necessary for
    things like networking to work properly) and keep CM happy.
    '''
    filesystem_fix_commands = []
    for file in ['/etc/hosts', '/etc/resolv.conf', '/etc/hostname', '/etc/localtime']:
        filesystem_fix_commands.append("cp {0} {0}.1; umount {0}; mv {0}.1 {0};".format(file))
    filesystem_fix_command = ' '.join(filesystem_fix_commands)
    cluster.ssh(filesystem_fix_command)

    change_cm_server_host(cluster, primary_node.fqdn)
    if len(secondary_nodes) > 1:
        additional_nodes = [node for node in secondary_nodes[1:]]
        remove_files(cluster, files=['/var/lib/cloudera-scm-agent/uuid',
                                     '/dfs*/dn/current/*'],
                     nodes=additional_nodes)

    # It looks like there may be something buggy when it comes to restarting the CM agent. Keep
    # going if this happens while we work on reproducing the problem.
    try:
        restart_cm_agents(cluster)
    except:
        pass

    logger.info('Waiting for Cloudera Manager server to come online...')
    cm_server_startup_time = wait_for_port_open(primary_node.ip_address,
                                                CM_SERVER_PORT, timeout_sec=180)
    logger.info("Detected Cloudera Manager server after %.2f seconds.", cm_server_startup_time)
    cm_server_web_ui_host_port = get_host_port_binding(primary_node.container_id,
                                                       CM_SERVER_PORT)

    logger.info("CM server is now accessible at http://%s:%s",
                getfqdn(), cm_server_web_ui_host_port)

    deployment = ClouderaManagerDeployment(cm_server_address=primary_node.ip_address)
    deployment.setup_api_resources()

    if len(cluster) > 2:
        deployment.add_hosts_to_cluster(secondary_node_fqdn=secondary_nodes[0].fqdn,
                                        all_fqdns=[node.fqdn for node in cluster])

    deployment.update_database_configs()
    deployment.update_hive_metastore_namenodes()

    if args.include_service_types:
        # CM maintains service types in CAPS, so make sure our args.include_service_types list
        # follows the same convention.
        service_types_to_leave = args.include_service_types.upper().split(',')
        for service in deployment.cluster.get_all_services():
            if service.type not in service_types_to_leave:
                logger.info('Removing service %s from %s...', service.name, deployment.cluster.displayName)
                deployment.cluster.delete_service(service.name)
    elif args.exclude_service_types:
        service_types_to_remove = args.exclude_service_types.upper().split(',')
        for service in deployment.cluster.get_all_services():
            if service.type in service_types_to_remove:
                logger.info('Removing service %s from %s...', service.name, deployment.cluster.displayName)
                deployment.cluster.delete_service(service.name)

    logger.info("Deploying client configuration...")
    deployment.cluster.deploy_client_config().wait()

    if not args.dont_start_cluster:
        logger.info('Starting cluster...')
        if not deployment.cluster.start().wait().success:
            raise Exception('Failed to start cluster.')
        logger.info('Starting Cloudera Management service...')
        if not deployment.cm.get_service().start().wait().success:
            raise Exception('Failed to start Cloudera Management service.')

        deployment.validate_services_started()

    logger.info("We'd love to know what you think of our CDH topology for clusterdock! Please "
                "direct any feedback to our community forum at "
                "http://tiny.cloudera.com/hadoop-101-forum.")