Ejemplo n.º 1
0
def start(args):
    image = "{0}/{1}/clusterdock:{2}_nodebase".format(args.registry_url,
                                                      args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
                                                      args.operating_system)
    if args.always_pull or not is_image_available_locally(image):
        pull_image(image)

    node_groups = [NodeGroup(name='nodes', nodes=[Node(hostname=hostname, network=args.network,
                                                       image=image)
                                                  for hostname in args.nodes])]
    cluster = Cluster(topology='nodebase', node_groups=node_groups, network_name=args.network)
    cluster.start()
Ejemplo n.º 2
0
def start(args):
    image = "{0}/{1}/clusterdock:{2}_nodebase".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.operating_system)
    if args.always_pull or not is_image_available_locally(image):
        pull_image(image)

    node_groups = [
        NodeGroup(name='nodes',
                  nodes=[
                      Node(hostname=hostname,
                           network=args.network,
                           image=image,
                           volumes=[]) for hostname in args.nodes
                  ])
    ]
    cluster = Cluster(node_groups=node_groups, network_name=args.network)
    cluster.start()
Ejemplo n.º 3
0
def start(args):
    primary_node_image = "{0}/{1}/clusterdock:{2}_{3}_primary-node".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.cdh_string, args.cm_string
    )

    secondary_node_image = "{0}/{1}/clusterdock:{2}_{3}_secondary-node".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.cdh_string, args.cm_string
    )

    for image in [primary_node_image, secondary_node_image]:
        if args.always_pull or not is_image_available_locally(image):
            logger.info("Pulling image %s. This might take a little while...", image)
            pull_image(image)

    CM_SERVER_PORT = 7180
    HUE_SERVER_PORT = 8888

    primary_node = Node(hostname=args.primary_node[0], network=args.network,
                        image=primary_node_image, ports=[CM_SERVER_PORT, HUE_SERVER_PORT])

    secondary_nodes = [Node(hostname=hostname, network=args.network, image=secondary_node_image)
                       for hostname in args.secondary_nodes]

    secondary_node_group = NodeGroup(name='secondary', nodes=secondary_nodes)
    node_groups = [NodeGroup(name='primary', nodes=[primary_node]),
                   secondary_node_group]

    cluster = Cluster(topology='cdh', node_groups=node_groups, network_name=args.network)
    cluster.start()

    '''
    A hack is needed here. In short, Docker mounts a number of files from the host into
    the container (and so do we). As such, when CM runs 'mount' inside of the containers
    during setup, it sees these ext4 files as suitable places in which to install things.
    Unfortunately, CM doesn't have a blacklist to ignore filesystem types and only including
    our containers' filesystem in the agents' config.ini whitelist is insufficient, since CM
    merges that list with the contents of /proc/filesystems. To work around this, we copy
    the culprit files inside of the container, which creates those files in aufs. We then
    unmount the volumes within the container and then move the files back to their original
    locations. By doing this, we preserve the contents of the files (which is necessary for
    things like networking to work properly) and keep CM happy.
    '''
    filesystem_fix_commands = []
    for file in ['/etc/hosts', '/etc/resolv.conf', '/etc/hostname', '/etc/localtime']:
        filesystem_fix_commands.append("cp {0} {0}.1; umount {0}; mv {0}.1 {0};".format(file))
    filesystem_fix_command = ' '.join(filesystem_fix_commands)
    cluster.ssh(filesystem_fix_command)

    change_cm_server_host(cluster, primary_node.fqdn)
    if len(secondary_nodes) > 1:
        additional_nodes = [node for node in secondary_nodes[1:]]
        remove_files(cluster, files=['/var/lib/cloudera-scm-agent/uuid',
                                     '/dfs*/dn/current/*'],
                     nodes=additional_nodes)

    # It looks like there may be something buggy when it comes to restarting the CM agent. Keep
    # going if this happens while we work on reproducing the problem.
    try:
        restart_cm_agents(cluster)
    except:
        pass

    logger.info('Waiting for Cloudera Manager server to come online...')
    cm_server_startup_time = wait_for_port_open(primary_node.ip_address,
                                                CM_SERVER_PORT, timeout_sec=180)
    logger.info("Detected Cloudera Manager server after %.2f seconds.", cm_server_startup_time)
    cm_server_web_ui_host_port = get_host_port_binding(primary_node.container_id,
                                                       CM_SERVER_PORT)

    logger.info("CM server is now accessible at http://%s:%s",
                getfqdn(), cm_server_web_ui_host_port)

    deployment = ClouderaManagerDeployment(cm_server_address=primary_node.ip_address)
    deployment.setup_api_resources()

    if len(cluster) > 2:
        deployment.add_hosts_to_cluster(secondary_node_fqdn=secondary_nodes[0].fqdn,
                                        all_fqdns=[node.fqdn for node in cluster])

    deployment.update_database_configs()
    deployment.update_hive_metastore_namenodes()

    if args.include_service_types:
        # CM maintains service types in CAPS, so make sure our args.include_service_types list
        # follows the same convention.
        service_types_to_leave = args.include_service_types.upper().split(',')
        for service in deployment.cluster.get_all_services():
            if service.type not in service_types_to_leave:
                logger.info('Removing service %s from %s...', service.name, deployment.cluster.displayName)
                deployment.cluster.delete_service(service.name)
    elif args.exclude_service_types:
        service_types_to_remove = args.exclude_service_types.upper().split(',')
        for service in deployment.cluster.get_all_services():
            if service.type in service_types_to_remove:
                logger.info('Removing service %s from %s...', service.name, deployment.cluster.displayName)
                deployment.cluster.delete_service(service.name)

    hue_server_host_port = get_host_port_binding(primary_node.container_id, HUE_SERVER_PORT)
    for service in deployment.cluster.get_all_services():
        if service.type == 'HUE':
            logger.info("Once its service starts, Hue server will be accessible at http://%s:%s",
                        getfqdn(), hue_server_host_port)
            break

    logger.info("Deploying client configuration...")
    deployment.cluster.deploy_client_config().wait()

    if not args.dont_start_cluster:
        logger.info('Starting cluster...')
        if not deployment.cluster.start().wait().success:
            raise Exception('Failed to start cluster.')
        logger.info('Starting Cloudera Management service...')
        if not deployment.cm.get_service().start().wait().success:
            raise Exception('Failed to start Cloudera Management service.')

        deployment.validate_services_started()

    logger.info("We'd love to know what you think of our CDH topology for clusterdock! Please "
                "direct any feedback to our community forum at "
                "http://tiny.cloudera.com/hadoop-101-forum.")
Ejemplo n.º 4
0
def start(args):
    """This function will be executed when ./bin/start_cluster apache_hbase is invoked."""

    # pylint: disable=too-many-locals
    # Pylint doesn't want more than 15 local variables in a function; this one has 17. This is about
    # as low as I want to go because, while I can cheat and stuff unrelated things in a dictionary,
    # that won't improve readability.

    uuid = str(uuid4())
    container_cluster_config_dir = join(CLUSTERDOCK_VOLUME, uuid, 'config')
    makedirs(container_cluster_config_dir)

    for mount in client.inspect_container(
            get_clusterdock_container_id())['Mounts']:
        if mount['Destination'] == CLUSTERDOCK_VOLUME:
            host_cluster_config_dir = join(mount['Source'], uuid, 'config')
            break
    else:
        raise Exception(
            "Could not find source of {0} mount.".format(CLUSTERDOCK_VOLUME))

    # CLUSTERDOCK_VOLUME/uuid/config in the clusterdock container corresponds to
    # host_cluster_config_dir on the Docker host.
    logger.debug("Creating directory for cluster configuration files in %s...",
                 host_cluster_config_dir)

    # Generate the image name to use from the command line arguments passed in.
    image = '/'.join([
        item for item in [
            args.registry_url, args.namespace or DEFAULT_APACHE_NAMESPACE,
            "clusterdock:{os}_java-{java}_hadoop-{hadoop}_hbase-{hbase}".
            format(os=args.operating_system,
                   java=args.java_version,
                   hadoop=args.hadoop_version,
                   hbase=args.hbase_version)
        ] if item
    ])
    if args.always_pull or not is_image_available_locally(image):
        pull_image(image)

    # Before starting the cluster, we create a throwaway container from which we copy
    # configuration files back to the host. We also use this container to run an HBase
    # command that returns the port of the HBase master web UI. Since we aren't running init here,
    # we also have to manually pass in JAVA_HOME as an environmental variable.
    get_hbase_web_ui_port_command = (
        '/hbase/bin/hbase org.apache.hadoop.hbase.util.HBaseConfTool '
        'hbase.master.info.port')
    container_id = client.create_container(
        image=image,
        command=get_hbase_web_ui_port_command,
        environment={'JAVA_HOME': '/java'})['Id']
    logger.debug(
        "Created temporary container (id: %s) from which to copy configuration files.",
        container_id)

    # Actually do the copying of Hadoop configs...
    _copy_container_folder_to_host(
        container_id, '/hadoop/etc/hadoop',
        join(container_cluster_config_dir, 'hadoop'),
        join(host_cluster_config_dir, 'hadoop'))

    # ... and repeat for HBase configs.
    _copy_container_folder_to_host(container_id, '/hbase/conf',
                                   join(container_cluster_config_dir, 'hbase'),
                                   join(host_cluster_config_dir, 'hbase'))

    logger.info(
        "The /hbase/lib folder on containers in the cluster will be volume mounted "
        "into %s...", join(host_cluster_config_dir, 'hbase-lib'))
    _copy_container_folder_to_host(
        container_id, '/hbase/lib',
        join(container_cluster_config_dir, 'hbase-lib'),
        join(host_cluster_config_dir, 'hbase-lib'))

    # Every node in the cluster will have a shared volume mount from the host for Hadoop and HBase
    # configuration files as well as the HBase lib folder.
    shared_volumes = [{
        join(host_cluster_config_dir, 'hadoop'):
        '/hadoop/etc/hadoop'
    }, {
        join(host_cluster_config_dir, 'hbase'): '/hbase/conf'
    }, {
        join(host_cluster_config_dir, 'hbase-lib'): '/hbase/lib'
    }]

    # Get the HBase master web UI port, stripping the newline the Docker REST API gives us.
    client.start(container=container_id)
    if client.wait(container=container_id) == EX_OK:
        hbase_master_web_ui_port = client.logs(container=container_id).rstrip()
        client.remove_container(container=container_id, force=True)
    else:
        raise Exception('Failed to remove HBase configuration container.')

    # Create the Node objects. These hold the state of our container nodes and will be started
    # at Cluster instantiation time.
    primary_node = Node(hostname=args.primary_node[0],
                        network=args.network,
                        image=image,
                        ports=[
                            NAMENODE_WEB_UI_PORT, hbase_master_web_ui_port,
                            RESOURCEMANAGER_WEB_UI_PORT, HBASE_REST_SERVER_PORT
                        ],
                        volumes=shared_volumes)
    secondary_nodes = []
    for hostname in args.secondary_nodes:
        # A list of service directories will be used to name folders on the host and, appended
        # with an index, in the container, as well (e.g. /data1/node-1/dfs:/dfs1).
        service_directories = ['dfs', 'yarn']

        # Every Node will have shared_volumes to let one set of configs on the host be propagated
        # to every container. If --data-directories is specified, this will be appended to allow
        # containers to use multiple disks on the host.
        volumes = shared_volumes[:]
        if args.data_directories:
            data_directories = args.data_directories.split(',')
            volumes += [{
                join(data_directory, uuid, hostname, service_directory):
                "/{0}{1}".format(service_directory, i)
            } for i, data_directory in enumerate(data_directories, start=1)
                        for service_directory in service_directories]
        secondary_nodes.append(
            Node(hostname=hostname,
                 network=args.network,
                 image=image,
                 volumes=volumes))

    Cluster(topology='apache_hbase',
            node_groups=[
                NodeGroup(name='primary', nodes=[primary_node]),
                NodeGroup(name='secondary', nodes=secondary_nodes)
            ],
            network_name=args.network).start()

    # When creating configs, pass in a dictionary of wildcards into create_configurations_from_file
    # to transform placeholders in the configurations.cfg file into real values.
    _create_configs_from_file(filename=args.configurations,
                              cluster_config_dir=container_cluster_config_dir,
                              wildcards={
                                  "primary_node": args.primary_node,
                                  "secondary_nodes": args.secondary_nodes,
                                  "all_nodes":
                                  args.primary_node + args.secondary_nodes,
                                  "network": args.network
                              })

    # After creating configurations from the configurations.cfg file, update hdfs-site.xml and
    # yarn-site.xml to use the data directories passed on the command line.
    if args.data_directories:
        _update_config_for_data_dirs(
            container_cluster_config_dir=container_cluster_config_dir,
            data_directories=data_directories)

    if not args.dont_start_services:
        _start_services(primary_node,
                        hbase_master_web_ui_port=hbase_master_web_ui_port)
Ejemplo n.º 5
0
def start(args):
    primary_node_image = "{0}/{1}/clusterdock:{2}_{3}_primary-node".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.cdh_string, args.cm_string
    )

    secondary_node_image = "{0}/{1}/clusterdock:{2}_{3}_secondary-node".format(
        args.registry_url, args.namespace or DEFAULT_CLOUDERA_NAMESPACE,
        args.cdh_string, args.cm_string
    )

    for image in [primary_node_image, secondary_node_image]:
        if args.always_pull or not is_image_available_locally(image):
            logger.info("Pulling image %s. This might take a little while...", image)
            pull_image(image)

    CM_SERVER_PORT = 7180

    primary_node = Node(hostname=args.primary_node[0], network=args.network,
                        image=primary_node_image, ports=[CM_SERVER_PORT],
                        volumes=[])

    secondary_nodes = [Node(hostname=hostname, network=args.network, image=secondary_node_image,
                            volumes=[])
                       for hostname in args.secondary_nodes]

    secondary_node_group = NodeGroup(name='secondary', nodes=secondary_nodes)
    node_groups = [NodeGroup(name='primary', nodes=[primary_node]),
                   secondary_node_group]

    cluster = Cluster(node_groups=node_groups, network_name=args.network)
    cluster.start()

    '''
    A hack is needed here. In short, Docker mounts a number of files from the host into
    the container (and so do we). As such, when CM runs 'mount' inside of the containers
    during setup, it sees these ext4 files as suitable places in which to install things.
    Unfortunately, CM doesn't have a blacklist to ignore filesystem types and only including
    our containers' filesystem in the agents' config.ini whitelist is insufficient, since CM
    merges that list with the contents of /proc/filesystems. To work around this, we copy
    the culprit files inside of the container, which creates those files in aufs. We then
    unmount the volumes within the container and then move the files back to their original
    locations. By doing this, we preserve the contents of the files (which is necessary for
    things like networking to work properly) and keep CM happy.
    '''
    filesystem_fix_commands = []
    for file in ['/etc/hosts', '/etc/resolv.conf', '/etc/hostname', '/etc/localtime']:
        filesystem_fix_commands.append("cp {0} {0}.1; umount {0}; mv {0}.1 {0};".format(file))
    filesystem_fix_command = ' '.join(filesystem_fix_commands)
    cluster.ssh(filesystem_fix_command)

    change_cm_server_host(cluster, primary_node.fqdn)
    if len(secondary_nodes) > 1:
        additional_nodes = [node for node in secondary_nodes[1:]]
        remove_files(cluster, files=['/var/lib/cloudera-scm-agent/uuid',
                                     '/dfs*/dn/current/*'],
                     nodes=additional_nodes)

    # It looks like there may be something buggy when it comes to restarting the CM agent. Keep
    # going if this happens while we work on reproducing the problem.
    try:
        restart_cm_agents(cluster)
    except:
        pass

    logger.info('Waiting for Cloudera Manager server to come online...')
    cm_server_startup_time = wait_for_port_open(primary_node.ip_address,
                                                CM_SERVER_PORT, timeout_sec=180)
    logger.info("Detected Cloudera Manager server after %.2f seconds.", cm_server_startup_time)
    cm_server_web_ui_host_port = get_host_port_binding(primary_node.container_id,
                                                       CM_SERVER_PORT)

    logger.info("CM server is now accessible at http://%s:%s",
                getfqdn(), cm_server_web_ui_host_port)

    deployment = ClouderaManagerDeployment(cm_server_address=primary_node.ip_address)
    deployment.setup_api_resources()

    if len(cluster) > 2:
        deployment.add_hosts_to_cluster(secondary_node_fqdn=secondary_nodes[0].fqdn,
                                        all_fqdns=[node.fqdn for node in cluster])

    deployment.update_database_configs()
    deployment.update_hive_metastore_namenodes()

    if args.include_service_types:
        # CM maintains service types in CAPS, so make sure our args.include_service_types list
        # follows the same convention.
        service_types_to_leave = args.include_service_types.upper().split(',')
        for service in deployment.cluster.get_all_services():
            if service.type not in service_types_to_leave:
                logger.info('Removing service %s from %s...', service.name, deployment.cluster.displayName)
                deployment.cluster.delete_service(service.name)
    elif args.exclude_service_types:
        service_types_to_remove = args.exclude_service_types.upper().split(',')
        for service in deployment.cluster.get_all_services():
            if service.type in service_types_to_remove:
                logger.info('Removing service %s from %s...', service.name, deployment.cluster.displayName)
                deployment.cluster.delete_service(service.name)

    logger.info("Deploying client configuration...")
    deployment.cluster.deploy_client_config().wait()

    if not args.dont_start_cluster:
        logger.info('Starting cluster...')
        if not deployment.cluster.start().wait().success:
            raise Exception('Failed to start cluster.')
        logger.info('Starting Cloudera Management service...')
        if not deployment.cm.get_service().start().wait().success:
            raise Exception('Failed to start Cloudera Management service.')

        deployment.validate_services_started()

    logger.info("We'd love to know what you think of our CDH topology for clusterdock! Please "
                "direct any feedback to our community forum at "
                "http://tiny.cloudera.com/hadoop-101-forum.")