def scale_cluster(cluster, instances): scale_ins_hosts = [i.fqdn() for i in instances] dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)] to_scale_dn = [] to_scale_tt = [] for i in scale_ins_hosts: if i in dn_hosts: to_scale_dn.append(i) if i in tt_hosts: to_scale_tt.append(i) client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name) rack = '/Default' client.nodes.add(scale_ins_hosts, rack, 'hadoop', '/home/hadoop/.ssh/id_rsa') client.cluster.install_software(scale_ins_hosts) if to_scale_tt: client.services.mapred.add_nodes('TaskTracker', to_scale_tt) if to_scale_dn: client.services.hdfs.add_nodes('DataNode', to_scale_dn) client.nodes.config() if to_scale_dn: client.services.hdfs.start() if to_scale_tt: client.services.mapred.start()
def install_cluster(cluster): mng_instance = u.get_instance(cluster, 'manager') all_hosts = list(set([i.fqdn() for i in u.get_instances(cluster)])) client = c.IntelClient(mng_instance, cluster.name) LOG.info("Create cluster") client.cluster.create() LOG.info("Add nodes to cluster") rack = '/Default' client.nodes.add(all_hosts, rack, 'hadoop', '/home/hadoop/.ssh/id_rsa') LOG.info("Install software") client.cluster.install_software(all_hosts) LOG.info("Configure services") _configure_services(client, cluster) LOG.info("Deploy cluster") client.nodes.config(force=True) LOG.info("Provisioning configs") # cinder and ephemeral drive support _configure_storage(client, cluster) # swift support _configure_swift(client, cluster) # user configs _add_user_params(client, cluster) LOG.info("Format HDFS") client.services.hdfs.format()
def scale_cluster(cluster, instances): scale_ins_hosts = [i.fqdn() for i in instances] dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)] to_scale_dn = [] to_scale_tt = [] for i in scale_ins_hosts: if i in dn_hosts: to_scale_dn.append(i) if i in tt_hosts: to_scale_tt.append(i) mng_ip = u.get_instance(cluster, 'manager').management_ip client = c.IntelClient(mng_ip, cluster.name) rack = '/Default' client.nodes.add(scale_ins_hosts, rack, 'hadoop', cluster.extra['manager_authzkeyfile_path']) client.cluster.install_software(scale_ins_hosts) if to_scale_tt: client.services.mapred.add_nodes('TaskTracker', to_scale_tt) if to_scale_dn: client.services.hdfs.add_nodes('DataNode', to_scale_dn) client.nodes.config() if to_scale_dn: client.services.hdfs.start() if to_scale_tt: client.services.mapred.start()
def install_cluster(cluster): mng_instance = u.get_instance(cluster, 'manager') mng_ip = mng_instance.management_ip all_hosts = list(set([i.fqdn() for i in u.get_instances(cluster)])) client = c.IntelClient(mng_ip, cluster.name) LOG.info("Create cluster") client.cluster.create() LOG.info("Add nodes to cluster") rack = '/Default' client.nodes.add(all_hosts, rack, 'hadoop', '/home/hadoop/.ssh/id_rsa') LOG.info("Install software") client.cluster.install_software(all_hosts) LOG.info("Configure services") _configure_services(client, cluster) LOG.info("Deploy cluster") client.nodes.config(force=True) LOG.info("Provisioning configs") # cinder and ephemeral drive support _configure_storage(client, cluster) # swift support _configure_swift(client, cluster) # user configs _add_user_params(client, cluster) LOG.info("Format HDFS") client.services.hdfs.format()
def start_cluster(cluster): client = c.IntelClient( u.get_instance(cluster, 'manager').management_ip, cluster.name) LOG.debug("Starting hadoop services") client.services.hdfs.start() client.services.mapred.start() if u.get_hiveserver(cluster): client.services.hive.start() if u.get_oozie(cluster): LOG.info("Setup oozie") _setup_oozie(cluster) client.services.oozie.start()
def start_cluster(cluster): client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name) LOG.debug("Starting hadoop services") client.services.hdfs.start() if u.get_jobtracker(cluster): client.services.mapred.start() if u.get_hiveserver(cluster): client.services.hive.start() if u.get_oozie(cluster): LOG.info("Setup oozie") _setup_oozie(cluster) client.services.oozie.start()
def install_manager(cluster): LOG.info("Starting Install Manager Process") mng_instance = u.get_instance(cluster, 'manager') idh_tarball_path = c_helper.get_config_value( cluster.cluster_configs.get('general'), c_helper.IDH_TARBALL_URL) idh_tarball_filename = idh_tarball_path.rsplit('/', 1)[-1] idh_dir = idh_tarball_filename[:idh_tarball_filename.find('.tar.gz')] LOG.info("IDH tgz will be retrieved from: \'%s\'", idh_tarball_path) idh_repo = c_helper.get_config_value( cluster.cluster_configs.get('general'), c_helper.IDH_REPO_URL) os_repo = c_helper.get_config_value(cluster.cluster_configs.get('general'), c_helper.OS_REPO_URL) idh_install_cmd = 'sudo ./%s/install.sh --mode=silent 2>&1' % idh_dir with mng_instance.remote() as r: LOG.info("Download IDH manager ") try: r.execute_command('curl -O %s 2>&1' % idh_tarball_path) except Exception as e: raise RuntimeError("Unable to download IDH manager from %s", idh_tarball_path, e) # unpack archive LOG.info("Unpack manager %s ", idh_tarball_filename) try: r.execute_command('tar xzf %s 2>&1' % idh_tarball_filename) except Exception as e: raise RuntimeError("Unable to unpack tgz %s", idh_tarball_filename, e) # install idh LOG.debug("Install manager with %s : ", idh_install_cmd) inst_conf = _INST_CONF_TEMPLATE % (os_repo, idh_repo) r.write_file_to('%s/ui-installer/conf' % idh_dir, inst_conf) #TODO(alazarev) make timeout configurable (bug #1262897) r.execute_command(idh_install_cmd, timeout=3600) # fix nginx persimmions bug r.execute_command('sudo chmod o+x /var/lib/nginx/ /var/lib/nginx/tmp ' '/var/lib/nginx/tmp/client_body') # waiting start idh manager #TODO(alazarev) make timeout configurable (bug #1262897) timeout = 600 LOG.debug("Waiting %s seconds for Manager to start : ", timeout) while timeout: try: telnetlib.Telnet(mng_instance.management_ip, 9443) break except IOError: timeout -= 2 context.sleep(2) else: message = ("IDH Manager failed to start in %s minutes on node '%s' " "of cluster '%s'" % (timeout / 60, mng_instance.management_ip, cluster.name)) LOG.error(message) raise iex.IntelPluginException(message)
def decommission_nodes(cluster, instances): dec_hosts = [i.fqdn() for i in instances] dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] tt_hosts = [dn.fqdn() for dn in u.get_tasktrackers(cluster)] client = c.IntelClient(u.get_instance(cluster, 'manager'), cluster.name) dec_dn_hosts = [] for dec_host in dec_hosts: if dec_host in dn_hosts: dec_dn_hosts.append(dec_host) if dec_dn_hosts: client.services.hdfs.decommission_nodes(dec_dn_hosts) #TODO(alazarev) make timeout configurable (bug #1262897) timeout = 14400 # 4 hours cur_time = 0 for host in dec_dn_hosts: while cur_time < timeout: if client.services.hdfs.get_datanode_status( host) == 'Decomissioned': break context.sleep(5) cur_time += 5 else: LOG.warn("Failed to decomission node '%s' of cluster '%s' " "in %s minutes" % (host, cluster.name, timeout / 60)) client.nodes.stop(dec_hosts) # wait stop services #TODO(alazarev) make timeout configurable (bug #1262897) timeout = 600 # 10 minutes cur_time = 0 for instance in instances: while cur_time < timeout: stopped = True if instance.fqdn() in dn_hosts: code, out = instance.remote().execute_command( 'sudo /sbin/service hadoop-datanode status', raise_when_error=False) if out.strip() != 'datanode is stopped': stopped = False if out.strip() == 'datanode dead but pid file exists': instance.remote().execute_command( 'sudo rm -f ' '/var/run/hadoop/hadoop-hadoop-datanode.pid') if instance.fqdn() in tt_hosts: code, out = instance.remote().execute_command( 'sudo /sbin/service hadoop-tasktracker status', raise_when_error=False) if out.strip() != 'tasktracker is stopped': stopped = False if stopped: break else: context.sleep(5) cur_time += 5 else: LOG.warn("Failed to stop services on node '%s' of cluster '%s' " "in %s minutes" % (instance, cluster.name, timeout / 60)) for node in dec_hosts: LOG.info("Deleting node '%s' on cluster '%s'" % (node, cluster.name)) client.nodes.delete(node)
def install_manager(cluster): LOG.info("Starting Install Manager Process") mng_instance = u.get_instance(cluster, 'manager') idh_tarball_path = c_helper.get_config_value( cluster.cluster_configs.get('general'), c_helper.IDH_TARBALL_URL) idh_tarball_filename = idh_tarball_path.rsplit('/', 1)[-1] idh_dir = idh_tarball_filename[:idh_tarball_filename.find('.tar.gz')] LOG.info("IDH tgz will be retrieved from: \'%s\'", idh_tarball_path) idh_repo = c_helper.get_config_value( cluster.cluster_configs.get('general'), c_helper.IDH_REPO_URL) os_repo = c_helper.get_config_value( cluster.cluster_configs.get('general'), c_helper.OS_REPO_URL) idh_install_cmd = 'sudo ./%s/install.sh --mode=silent 2>&1' % idh_dir with mng_instance.remote() as r: LOG.info("Download IDH manager ") try: r.execute_command('curl -O %s 2>&1' % idh_tarball_path) except Exception as e: raise RuntimeError("Unable to download IDH manager from %s", idh_tarball_path, e) # unpack archive LOG.info("Unpack manager %s ", idh_tarball_filename) try: r.execute_command('tar xzf %s 2>&1' % idh_tarball_filename) except Exception as e: raise RuntimeError("Unable to unpack tgz %s", idh_tarball_filename, e) # install idh LOG.debug("Install manager with %s : ", idh_install_cmd) inst_conf = _INST_CONF_TEMPLATE % (os_repo, idh_repo) r.write_file_to('%s/ui-installer/conf' % idh_dir, inst_conf) #TODO(alazarev) make timeout configurable (bug #1262897) r.execute_command(idh_install_cmd, timeout=3600) # fix nginx persimmions bug r.execute_command('sudo chmod o+x /var/lib/nginx/ /var/lib/nginx/tmp ' '/var/lib/nginx/tmp/client_body') # waiting start idh manager #TODO(alazarev) make timeout configurable (bug #1262897) timeout = 600 LOG.debug("Waiting %s seconds for Manager to start : ", timeout) while timeout: try: telnetlib.Telnet(mng_instance.management_ip, 9443) break except IOError: timeout -= 2 context.sleep(2) else: message = ("IDH Manager failed to start in %s minutes on node '%s' " "of cluster '%s'" % (timeout / 60, mng_instance.management_ip, cluster.name)) LOG.error(message) raise iex.IntelPluginException(message)
def get_oozie_server(self, cluster): return u.get_instance(cluster, "oozie")
def get_oozie_server(self, cluster): return u.get_instance(cluster, "oozie_server")
def test_get_instance(self): self.assertIsNone(u.get_instance(self.c1, 'wrong-process')) self.assertEqual(u.get_instance(self.c1, 'nn'), self.ng1.instances[0]) with self.assertRaises(ex.InvalidComponentCountException): u.get_instance(self.c1, 'dn')