Example #1
0
def scale_cluster(cluster, instances):
    scale_ins_hosts = [i.fqdn() for i in instances]
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)]
    to_scale_dn = []
    to_scale_tt = []
    for i in scale_ins_hosts:
        if i in dn_hosts:
            to_scale_dn.append(i)

        if i in tt_hosts:
            to_scale_tt.append(i)

    client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name)
    rack = '/Default'
    client.nodes.add(scale_ins_hosts, rack, 'hadoop',
                     '/home/hadoop/.ssh/id_rsa')
    client.cluster.install_software(scale_ins_hosts)

    if to_scale_tt:
        client.services.mapred.add_nodes('TaskTracker', to_scale_tt)

    if to_scale_dn:
        client.services.hdfs.add_nodes('DataNode', to_scale_dn)

    client.nodes.config()

    if to_scale_dn:
        client.services.hdfs.start()

    if to_scale_tt:
        client.services.mapred.start()
Example #2
0
def install_cluster(cluster):
    mng_instance = u.get_instance(cluster, 'manager')

    all_hosts = list(set([i.fqdn() for i in u.get_instances(cluster)]))

    client = c.IntelClient(mng_instance, cluster.name)

    LOG.info("Create cluster")
    client.cluster.create()

    LOG.info("Add nodes to cluster")
    rack = '/Default'
    client.nodes.add(all_hosts, rack, 'hadoop', '/home/hadoop/.ssh/id_rsa')

    LOG.info("Install software")
    client.cluster.install_software(all_hosts)

    LOG.info("Configure services")
    _configure_services(client, cluster)

    LOG.info("Deploy cluster")
    client.nodes.config(force=True)

    LOG.info("Provisioning configs")
    # cinder and ephemeral drive support
    _configure_storage(client, cluster)
    # swift support
    _configure_swift(client, cluster)
    # user configs
    _add_user_params(client, cluster)

    LOG.info("Format HDFS")
    client.services.hdfs.format()
Example #3
0
def scale_cluster(cluster, instances):
    scale_ins_hosts = [i.fqdn() for i in instances]
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)]
    to_scale_dn = []
    to_scale_tt = []
    for i in scale_ins_hosts:
        if i in dn_hosts:
            to_scale_dn.append(i)

        if i in tt_hosts:
            to_scale_tt.append(i)

    mng_ip = u.get_instance(cluster, 'manager').management_ip
    client = c.IntelClient(mng_ip, cluster.name)
    rack = '/Default'
    client.nodes.add(scale_ins_hosts, rack, 'hadoop',
                     cluster.extra['manager_authzkeyfile_path'])
    client.cluster.install_software(scale_ins_hosts)

    if to_scale_tt:
        client.services.mapred.add_nodes('TaskTracker', to_scale_tt)

    if to_scale_dn:
        client.services.hdfs.add_nodes('DataNode', to_scale_dn)

    client.nodes.config()

    if to_scale_dn:
        client.services.hdfs.start()

    if to_scale_tt:
        client.services.mapred.start()
Example #4
0
def install_cluster(cluster):
    mng_instance = u.get_instance(cluster, 'manager')
    mng_ip = mng_instance.management_ip

    all_hosts = list(set([i.fqdn() for i in u.get_instances(cluster)]))

    client = c.IntelClient(mng_ip, cluster.name)

    LOG.info("Create cluster")
    client.cluster.create()

    LOG.info("Add nodes to cluster")
    rack = '/Default'
    client.nodes.add(all_hosts, rack, 'hadoop',
                     '/home/hadoop/.ssh/id_rsa')

    LOG.info("Install software")
    client.cluster.install_software(all_hosts)

    LOG.info("Configure services")
    _configure_services(client, cluster)

    LOG.info("Deploy cluster")
    client.nodes.config(force=True)

    LOG.info("Provisioning configs")
    # cinder and ephemeral drive support
    _configure_storage(client, cluster)
    # swift support
    _configure_swift(client, cluster)
    # user configs
    _add_user_params(client, cluster)

    LOG.info("Format HDFS")
    client.services.hdfs.format()
Example #5
0
def start_cluster(cluster):
    client = c.IntelClient(
        u.get_instance(cluster, 'manager').management_ip, cluster.name)

    LOG.debug("Starting hadoop services")
    client.services.hdfs.start()

    client.services.mapred.start()
    if u.get_hiveserver(cluster):
        client.services.hive.start()

    if u.get_oozie(cluster):
        LOG.info("Setup oozie")
        _setup_oozie(cluster)

        client.services.oozie.start()
Example #6
0
def start_cluster(cluster):
    client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name)

    LOG.debug("Starting hadoop services")
    client.services.hdfs.start()

    if u.get_jobtracker(cluster):
        client.services.mapred.start()

    if u.get_hiveserver(cluster):
        client.services.hive.start()

    if u.get_oozie(cluster):
        LOG.info("Setup oozie")
        _setup_oozie(cluster)

        client.services.oozie.start()
Example #7
0
def install_manager(cluster):
    LOG.info("Starting Install Manager Process")
    mng_instance = u.get_instance(cluster, 'manager')

    idh_tarball_path = c_helper.get_config_value(
        cluster.cluster_configs.get('general'), c_helper.IDH_TARBALL_URL)

    idh_tarball_filename = idh_tarball_path.rsplit('/', 1)[-1]
    idh_dir = idh_tarball_filename[:idh_tarball_filename.find('.tar.gz')]
    LOG.info("IDH tgz will be retrieved from: \'%s\'", idh_tarball_path)

    idh_repo = c_helper.get_config_value(
        cluster.cluster_configs.get('general'), c_helper.IDH_REPO_URL)

    os_repo = c_helper.get_config_value(cluster.cluster_configs.get('general'),
                                        c_helper.OS_REPO_URL)

    idh_install_cmd = 'sudo ./%s/install.sh --mode=silent 2>&1' % idh_dir

    with mng_instance.remote() as r:
        LOG.info("Download IDH manager ")
        try:
            r.execute_command('curl -O %s 2>&1' % idh_tarball_path)
        except Exception as e:
            raise RuntimeError("Unable to download IDH manager from %s",
                               idh_tarball_path, e)

        # unpack archive
        LOG.info("Unpack manager %s ", idh_tarball_filename)
        try:
            r.execute_command('tar xzf %s 2>&1' % idh_tarball_filename)
        except Exception as e:
            raise RuntimeError("Unable to unpack tgz %s", idh_tarball_filename,
                               e)

        # install idh
        LOG.debug("Install manager with %s : ", idh_install_cmd)
        inst_conf = _INST_CONF_TEMPLATE % (os_repo, idh_repo)
        r.write_file_to('%s/ui-installer/conf' % idh_dir, inst_conf)
        #TODO(alazarev) make timeout configurable (bug #1262897)
        r.execute_command(idh_install_cmd, timeout=3600)

        # fix nginx persimmions bug
        r.execute_command('sudo chmod o+x /var/lib/nginx/ /var/lib/nginx/tmp '
                          '/var/lib/nginx/tmp/client_body')

    # waiting start idh manager
    #TODO(alazarev) make timeout configurable (bug #1262897)
    timeout = 600
    LOG.debug("Waiting %s seconds for Manager to start : ", timeout)
    while timeout:
        try:
            telnetlib.Telnet(mng_instance.management_ip, 9443)
            break
        except IOError:
            timeout -= 2
            context.sleep(2)
    else:
        message = ("IDH Manager failed to start in %s minutes on node '%s' "
                   "of cluster '%s'" %
                   (timeout / 60, mng_instance.management_ip, cluster.name))
        LOG.error(message)
        raise iex.IntelPluginException(message)
Example #8
0
def decommission_nodes(cluster, instances):
    dec_hosts = [i.fqdn() for i in instances]
    dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)]
    tt_hosts = [dn.fqdn() for dn in u.get_tasktrackers(cluster)]

    client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name)

    dec_dn_hosts = []
    for dec_host in dec_hosts:
        if dec_host in dn_hosts:
            dec_dn_hosts.append(dec_host)

    if dec_dn_hosts:
        client.services.hdfs.decommission_nodes(dec_dn_hosts)

        #TODO(alazarev) make timeout configurable (bug #1262897)
        timeout = 14400  # 4 hours
        cur_time = 0
        for host in dec_dn_hosts:
            while cur_time < timeout:
                if client.services.hdfs.get_datanode_status(
                        host) == 'Decomissioned':
                    break
                context.sleep(5)
                cur_time += 5
            else:
                LOG.warn("Failed to decomission node '%s' of cluster '%s' "
                         "in %s minutes" % (host, cluster.name, timeout / 60))

    client.nodes.stop(dec_hosts)

    # wait stop services
    #TODO(alazarev) make timeout configurable (bug #1262897)
    timeout = 600  # 10 minutes
    cur_time = 0
    for instance in instances:
        while cur_time < timeout:
            stopped = True
            if instance.fqdn() in dn_hosts:
                code, out = instance.remote().execute_command(
                    'sudo /sbin/service hadoop-datanode status',
                    raise_when_error=False)
                if out.strip() != 'datanode is stopped':
                    stopped = False
                if out.strip() == 'datanode dead but pid file exists':
                    instance.remote().execute_command(
                        'sudo rm -f '
                        '/var/run/hadoop/hadoop-hadoop-datanode.pid')
            if instance.fqdn() in tt_hosts:
                code, out = instance.remote().execute_command(
                    'sudo /sbin/service hadoop-tasktracker status',
                    raise_when_error=False)
                if out.strip() != 'tasktracker is stopped':
                    stopped = False
            if stopped:
                break
            else:
                context.sleep(5)
                cur_time += 5
        else:
            LOG.warn("Failed to stop services on node '%s' of cluster '%s' "
                     "in %s minutes" % (instance, cluster.name, timeout / 60))

    for node in dec_hosts:
        LOG.info("Deleting node '%s' on cluster '%s'" % (node, cluster.name))
        client.nodes.delete(node)
Example #9
0
def install_manager(cluster):
    LOG.info("Starting Install Manager Process")
    mng_instance = u.get_instance(cluster, 'manager')

    idh_tarball_path = c_helper.get_config_value(
        cluster.cluster_configs.get('general'), c_helper.IDH_TARBALL_URL)

    idh_tarball_filename = idh_tarball_path.rsplit('/', 1)[-1]
    idh_dir = idh_tarball_filename[:idh_tarball_filename.find('.tar.gz')]
    LOG.info("IDH tgz will be retrieved from: \'%s\'", idh_tarball_path)

    idh_repo = c_helper.get_config_value(
        cluster.cluster_configs.get('general'), c_helper.IDH_REPO_URL)

    os_repo = c_helper.get_config_value(
        cluster.cluster_configs.get('general'), c_helper.OS_REPO_URL)

    idh_install_cmd = 'sudo ./%s/install.sh --mode=silent 2>&1' % idh_dir

    with mng_instance.remote() as r:
        LOG.info("Download IDH manager ")
        try:
            r.execute_command('curl -O %s 2>&1' % idh_tarball_path)
        except Exception as e:
            raise RuntimeError("Unable to download IDH manager from %s",
                               idh_tarball_path, e)

        # unpack archive
        LOG.info("Unpack manager %s ", idh_tarball_filename)
        try:
            r.execute_command('tar xzf %s 2>&1' % idh_tarball_filename)
        except Exception as e:
            raise RuntimeError("Unable to unpack tgz %s",
                               idh_tarball_filename, e)

        # install idh
        LOG.debug("Install manager with %s : ", idh_install_cmd)
        inst_conf = _INST_CONF_TEMPLATE % (os_repo, idh_repo)
        r.write_file_to('%s/ui-installer/conf' % idh_dir, inst_conf)
        #TODO(alazarev) make timeout configurable (bug #1262897)
        r.execute_command(idh_install_cmd, timeout=3600)

        # fix nginx persimmions bug
        r.execute_command('sudo chmod o+x /var/lib/nginx/ /var/lib/nginx/tmp '
                          '/var/lib/nginx/tmp/client_body')

    # waiting start idh manager
    #TODO(alazarev) make timeout configurable (bug #1262897)
    timeout = 600
    LOG.debug("Waiting %s seconds for Manager to start : ", timeout)
    while timeout:
        try:
            telnetlib.Telnet(mng_instance.management_ip, 9443)
            break
        except IOError:
            timeout -= 2
            context.sleep(2)
    else:
        message = ("IDH Manager failed to start in %s minutes on node '%s' "
                   "of cluster '%s'"
                   % (timeout / 60, mng_instance.management_ip, cluster.name))
        LOG.error(message)
        raise iex.IntelPluginException(message)
Example #10
0
 def get_oozie_server(self, cluster):
     return u.get_instance(cluster, "oozie")
Example #11
0
 def get_oozie_server(self, cluster):
     return u.get_instance(cluster, "oozie_server")
Example #12
0
 def test_get_instance(self):
     self.assertIsNone(u.get_instance(self.c1, 'wrong-process'))
     self.assertEqual(u.get_instance(self.c1, 'nn'), self.ng1.instances[0])
     with self.assertRaises(ex.InvalidComponentCountException):
         u.get_instance(self.c1, 'dn')