def test_get_instances(self): self.assertEqual(len(u.get_instances(self.c1)), 5) self.assertListEqual(u.get_instances(self.c1, "wrong-process"), []) self.assertListEqual(u.get_instances(self.c1, "nn"), self.ng1.instances) instances = list(self.ng2.instances) instances += self.ng3.instances self.assertListEqual(u.get_instances(self.c1, "dn"), instances)
def test_get_instances(self): self.assertEquals(len(u.get_instances(self.c1)), 5) self.assertListEqual(u.get_instances(self.c1, 'wrong-process'), []) self.assertListEqual(u.get_instances(self.c1, 'nn'), self.ng1.instances) self.assertListEqual(u.get_instances(self.c1, 'dn'), self.ng2.instances + self.ng3.instances)
def test_get_instances(self): self.assertEqual(len(u.get_instances(self.c1)), 5) self.assertListEqual(u.get_instances(self.c1, 'wrong-process'), []) self.assertListEqual(u.get_instances(self.c1, 'nn'), self.ng1.instances) instances = list(self.ng2.instances) instances += self.ng3.instances self.assertListEqual(u.get_instances(self.c1, 'dn'), instances)
def install_cluster(cluster): mng_instance = u.get_instance(cluster, 'manager') mng_ip = mng_instance.management_ip all_hosts = list(set([i.fqdn() for i in u.get_instances(cluster)])) client = c.IntelClient(mng_ip, cluster.name) LOG.info("Create cluster") client.cluster.create() LOG.info("Add nodes to cluster") rack = '/Default' client.nodes.add(all_hosts, rack, 'hadoop', '/home/hadoop/.ssh/id_rsa') LOG.info("Install software") client.cluster.install_software(all_hosts) LOG.info("Configure services") _configure_services(client, cluster) LOG.info("Deploy cluster") client.nodes.config(force=True) LOG.info("Provisioning configs") # cinder and ephemeral drive support _configure_storage(client, cluster) # swift support _configure_swift(client, cluster) # user configs _add_user_params(client, cluster) LOG.info("Format HDFS") client.services.hdfs.format()
def install_cluster(cluster): mng_instance = u.get_instance(cluster, 'manager') all_hosts = list(set([i.fqdn() for i in u.get_instances(cluster)])) client = c.IntelClient(mng_instance, cluster.name) LOG.info("Create cluster") client.cluster.create() LOG.info("Add nodes to cluster") rack = '/Default' client.nodes.add(all_hosts, rack, 'hadoop', '/home/hadoop/.ssh/id_rsa') LOG.info("Install software") client.cluster.install_software(all_hosts) LOG.info("Configure services") _configure_services(client, cluster) LOG.info("Deploy cluster") client.nodes.config(force=True) LOG.info("Provisioning configs") # cinder and ephemeral drive support _configure_storage(client, cluster) # swift support _configure_swift(client, cluster) # user configs _add_user_params(client, cluster) LOG.info("Format HDFS") client.services.hdfs.format()
def _set_cluster_info(self, cluster): mng = u.get_instances(cluster, 'manager')[0] nn = u.get_namenode(cluster) jt = u.get_jobtracker(cluster) oozie = u.get_oozie(cluster) #TODO(alazarev) make port configurable (bug #1262895) info = {'IDH Manager': { 'Web UI': 'https://%s:9443' % mng.management_ip }} if jt: #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce'] = { 'Web UI': 'http://%s:50030' % jt.management_ip } #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname() if nn: #TODO(alazarev) make port configurable (bug #1262895) info['HDFS'] = { 'Web UI': 'http://%s:50070' % nn.management_ip } #TODO(alazarev) make port configurable (bug #1262895) info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname() if oozie: #TODO(alazarev) make port configurable (bug #1262895) info['JobFlow'] = { 'Oozie': 'http://%s:11000' % oozie.management_ip } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def _get_cluster_hosts_information(host, cluster): for c in conductor.cluster_get_all(context.ctx()): if c.id == cluster.id: continue for i in u.get_instances(c): if i.instance_name == host: return g.generate_etc_hosts(c) return None
def _push_configs_to_nodes(self, cluster, extra, new_instances): all_instances = utils.get_instances(cluster) with context.ThreadGroup() as tg: for instance in all_instances: if instance in new_instances: tg.spawn('vanilla-configure-%s' % instance.instance_name, self._push_configs_to_new_node, cluster, extra, instance) else: tg.spawn('vanilla-reconfigure-%s' % instance.instance_name, self._push_configs_to_existing_node, cluster, extra, instance)
def _push_configs_to_nodes(self, cluster, instances=None): extra = self._extract_configs_to_extra(cluster) if instances is None: instances = utils.get_instances(cluster) for inst in instances: ng_extra = extra[inst.node_group.id] files = { '/etc/hadoop/core-site.xml': ng_extra['xml']['core-site'], '/etc/hadoop/mapred-site.xml': ng_extra['xml']['mapred-site'], '/etc/hadoop/hdfs-site.xml': ng_extra['xml']['hdfs-site'], '/tmp/savanna-hadoop-init.sh': ng_extra['setup_script'] } with remote.get_remote(inst) as r: # TODO(aignatov): sudo chown is wrong solution. But it works. r.execute_command( 'sudo chown -R $USER:$USER /etc/hadoop' ) r.execute_command( 'sudo chown -R $USER:$USER /opt/oozie/conf' ) r.write_files_to(files) r.execute_command( 'sudo chmod 0500 /tmp/savanna-hadoop-init.sh' ) r.execute_command( 'sudo /tmp/savanna-hadoop-init.sh ' '>> /tmp/savanna-hadoop-init.log 2>&1') nn = utils.get_namenode(cluster) jt = utils.get_jobtracker(cluster) with remote.get_remote(nn) as r: r.write_file_to('/etc/hadoop/dn.incl', utils. generate_fqdn_host_names( utils.get_datanodes(cluster))) if jt: with remote.get_remote(jt) as r: r.write_file_to('/etc/hadoop/tt.incl', utils. generate_fqdn_host_names( utils.get_tasktrackers(cluster))) oozie = utils.get_oozie(cluster) if oozie: with remote.get_remote(oozie) as r: r.write_file_to('/opt/oozie/conf/oozie-site.xml', extra[oozie.node_group.id] ['xml']['oozie-site'])
def configure_cluster_for_hdfs(cluster, data_source): host = urlparse.urlparse(data_source.url).hostname etc_hosts_information = _get_cluster_hosts_information(host, cluster) if etc_hosts_information is None: # Ip address hasn't been resolved, the last chance is for VM itself return create_etc_host = 'sudo "cat /tmp/etc-hosts-update ' create_etc_host += '/etc/hosts > /tmp/etc-hosts"' copy_etc_host = 'sudo "cat /tmp/etc-hosts > /etc/hosts"' for inst in u.get_instances(cluster): with inst.remote as r: r.write_file_to('/tmp/etc-hosts-update', etc_hosts_information) r.execute_command(create_etc_host) r.execute_command(copy_etc_host)
def start_cluster(self, cluster): nn_instance = utils.get_namenode(cluster) with remote.get_remote(nn_instance) as r: run.format_namenode(r) run.start_processes(r, "namenode") for snn in utils.get_secondarynamenodes(cluster): run.start_processes(remote.get_remote(snn), "secondarynamenode") jt_instance = utils.get_jobtracker(cluster) if jt_instance: run.start_processes(remote.get_remote(jt_instance), "jobtracker") self._start_tt_dn_processes(utils.get_instances(cluster)) self._await_datanodes(cluster) LOG.info("Hadoop services in cluster %s have been started" % cluster.name) oozie = utils.get_oozie(cluster) if oozie: with remote.get_remote(oozie) as r: if c_helper.is_mysql_enable(cluster): run.mysql_start(r, oozie) run.oozie_create_db(r) run.oozie_share_lib(r, nn_instance.hostname()) run.start_oozie(r) LOG.info("Oozie service at '%s' has been started", nn_instance.hostname()) hive_server = utils.get_hiveserver(cluster) if hive_server: with remote.get_remote(nn_instance) as r: run.hive_create_warehouse_dir(r) if c_helper.is_mysql_enable(cluster): with remote.get_remote(hive_server) as h: if not oozie or hive_server.hostname() != oozie.hostname(): run.mysql_start(h, hive_server) run.hive_create_db(h) run.hive_metastore_start(h) LOG.info("Hive Metastore server at %s has been started", hive_server.hostname()) LOG.info('Cluster %s has been started successfully' % cluster.name) self._set_cluster_info(cluster)
def start_cluster(self, cluster): instances = utils.get_instances(cluster) nn_instance = utils.get_namenode(cluster) jt_instance = utils.get_jobtracker(cluster) oozie = utils.get_oozie(cluster) hive_server = utils.get_hiveserver(cluster) with remote.get_remote(nn_instance) as r: run.format_namenode(r) run.start_processes(r, "namenode") for snn in utils.get_secondarynamenodes(cluster): run.start_processes(remote.get_remote(snn), "secondarynamenode") if jt_instance: run.start_processes(remote.get_remote(jt_instance), "jobtracker") self._start_tt_dn_processes(instances) LOG.info("Hadoop services in cluster %s have been started" % cluster.name) if oozie: with remote.get_remote(oozie) as r: if c_helper.is_mysql_enable(cluster): run.mysql_start(r, oozie) run.oozie_create_db(r) run.oozie_share_lib(r, nn_instance.hostname) run.start_oozie(r) LOG.info("Oozie service at '%s' has been started", nn_instance.hostname) if hive_server: with remote.get_remote(nn_instance) as r: run.hive_create_warehouse_dir(r) if c_helper.is_mysql_enable(cluster): with remote.get_remote(hive_server) as h: if not oozie or hive_server.hostname != oozie.hostname: run.mysql_start(h, hive_server) run.hive_create_db(h) run.hive_metastore_start(h) LOG.info("Hive Metastore server at %s has been started", hive_server.hostname) LOG.info('Cluster %s has been started successfully' % cluster.name) self._set_cluster_info(cluster)
def configure_os(cluster): instances = u.get_instances(cluster) configure_os_from_instances(cluster, instances)
def configure_cluster(self, cluster): self._push_configs_to_nodes(cluster) self._write_hadoop_user_keys(cluster.private_key, utils.get_instances(cluster))
def configure_cluster(self, cluster): instances = utils.get_instances(cluster) self._setup_instances(cluster, instances)
def configure_cluster(self, cluster): self._push_configs_to_nodes(cluster) self._write_hadoop_user_keys(utils.get_instances(cluster), cluster.management_private_key, cluster.management_public_key)
def decommission_nodes(cluster, instances): dec_hosts = [i.fqdn() for i in instances] dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] tt_hosts = [dn.fqdn() for dn in u.get_tasktrackers(cluster)] mng_ip = u.get_instances(cluster, 'manager')[0].management_ip client = c.IntelClient(mng_ip, cluster.name) dec_dn_hosts = [] for dec_host in dec_hosts: if dec_host in dn_hosts: dec_dn_hosts.append(dec_host) if dec_dn_hosts: client.services.hdfs.decommission_nodes(dec_dn_hosts) #TODO(alazarev) make timeout configurable (bug #1262897) timeout = 14400 # 4 hours cur_time = 0 for host in dec_dn_hosts: while cur_time < timeout: if client.services.hdfs.get_datanode_status( host) == 'Decomissioned': break context.sleep(5) cur_time += 5 else: LOG.warn("Failed to decomission node '%s' of cluster '%s' " "in %s minutes" % (host, cluster.name, timeout/60)) client.nodes.stop(dec_hosts) # wait stop services #TODO(alazarev) make timeout configurable (bug #1262897) timeout = 600 # 10 minutes cur_time = 0 for instance in instances: while cur_time < timeout: stopped = True if instance.fqdn() in dn_hosts: code, out = instance.remote().execute_command( 'sudo /sbin/service hadoop-datanode status', raise_when_error=False) if out.strip() != 'datanode is stopped': stopped = False if out.strip() == 'datanode dead but pid file exists': instance.remote().execute_command( 'sudo rm -f ' '/var/run/hadoop/hadoop-hadoop-datanode.pid') if instance.fqdn() in tt_hosts: code, out = instance.remote().execute_command( 'sudo /sbin/service hadoop-tasktracker status', raise_when_error=False) if out.strip() != 'tasktracker is stopped': stopped = False if stopped: break else: context.sleep(5) cur_time += 5 else: LOG.warn("Failed to stop services on node '%s' of cluster '%s' " "in %s minutes" % (instance, cluster.name, timeout/60)) for node in dec_hosts: LOG.info("Deleting node '%s' on cluster '%s'" % (node, cluster.name)) client.nodes.delete(node)