def _configure_services(client, cluster): nn_host = u.get_namenode(cluster).fqdn() snn = u.get_secondarynamenodes(cluster) snn_host = snn[0].fqdn() if snn else None rm_host = u.get_resourcemanager(cluster).fqdn() if u.get_resourcemanager( cluster) else None hs_host = u.get_historyserver(cluster).fqdn() if u.get_historyserver( cluster) else None dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] nm_hosts = [tt.fqdn() for tt in u.get_nodemanagers(cluster)] oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie(cluster) else None hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver( cluster) else None services = [] if u.get_namenode(cluster): services += ['hdfs'] if u.get_resourcemanager(cluster): services += ['yarn'] if oozie_host: services += ['oozie'] services += ['pig'] if hive_host: services += ['hive'] LOG.debug("Add services: %s" % ', '.join(services)) client.services.add(services) LOG.debug("Assign roles to hosts") client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host]) client.services.hdfs.add_nodes('DataNode', dn_hosts) if snn: client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host]) if oozie_host: client.services.oozie.add_nodes('Oozie', [oozie_host]) if hive_host: client.services.hive.add_nodes('HiveServer', [hive_host]) if rm_host: client.services.yarn.add_nodes('ResourceManager', [rm_host]) client.services.yarn.add_nodes('NodeManager', nm_hosts) if hs_host: client.services.yarn.add_nodes('HistoryServer', [hs_host])
def generate_sahara_configs(cluster, node_group=None): nn_hostname = _get_hostname(utils.get_namenode(cluster)) jt_hostname = _get_hostname(utils.get_jobtracker(cluster)) oozie_hostname = _get_hostname(utils.get_oozie(cluster)) hive_hostname = _get_hostname(utils.get_hiveserver(cluster)) storage_path = node_group.storage_paths() if node_group else None # inserting common configs depends on provisioned VMs and HDFS placement # TODO(aignatov): should be moved to cluster context cfg = { 'fs.default.name': 'hdfs://%s:8020' % nn_hostname, 'dfs.name.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/namenode'), 'dfs.data.dir': extract_hadoop_path(storage_path, '/lib/hadoop/hdfs/datanode'), 'dfs.hosts': '/etc/hadoop/dn.incl', 'dfs.hosts.exclude': '/etc/hadoop/dn.excl', } if jt_hostname: mr_cfg = { 'mapred.job.tracker': '%s:8021' % jt_hostname, 'mapred.system.dir': extract_hadoop_path(storage_path, '/mapred/mapredsystem'), 'mapred.local.dir': extract_hadoop_path(storage_path, '/lib/hadoop/mapred'), 'mapred.hosts': '/etc/hadoop/tt.incl', 'mapred.hosts.exclude': '/etc/hadoop/tt.excl', } cfg.update(mr_cfg) if oozie_hostname: o_cfg = { 'hadoop.proxyuser.hadoop.hosts': "localhost," + oozie_hostname, 'hadoop.proxyuser.hadoop.groups': 'hadoop', } cfg.update(o_cfg) LOG.debug('Applied Oozie configs for core-site.xml') cfg.update(o_h.get_oozie_required_xml_configs()) LOG.debug('Applied Oozie configs for oozie-site.xml') if hive_hostname: h_cfg = { 'hive.warehouse.subdir.inherit.perms': True, 'javax.jdo.option.ConnectionURL': 'jdbc:derby:;databaseName=/opt/hive/metastore_db;create=true' } cfg.update(h_cfg) LOG.debug('Applied Hive config for hive metastore server') return cfg
def start_cluster(self, cluster): nn = utils.get_namenode(cluster) run.format_namenode(nn) run.start_hadoop_process(nn, 'namenode') rm = utils.get_resourcemanager(cluster) run.start_yarn_process(rm, 'resourcemanager') for dn in utils.get_datanodes(cluster): run.start_hadoop_process(dn, 'datanode') run.await_datanodes(cluster) for nm in utils.get_nodemanagers(cluster): run.start_yarn_process(nm, 'nodemanager') hs = utils.get_historyserver(cluster) if hs: run.start_historyserver(hs) oo = utils.get_oozie(cluster) if oo: run.start_oozie_process(oo) self._set_cluster_info(cluster)
def _set_cluster_info(self, cluster): nn = utils.get_namenode(cluster) rm = utils.get_resourcemanager(cluster) hs = utils.get_historyserver(cluster) oo = utils.get_oozie(cluster) info = {} if rm: info['YARN'] = { 'Web UI': 'http://%s:%s' % (rm.management_ip, '8088'), 'ResourceManager': 'http://%s:%s' % (rm.management_ip, '8032') } if nn: info['HDFS'] = { 'Web UI': 'http://%s:%s' % (nn.management_ip, '50070'), 'NameNode': 'hdfs://%s:%s' % (nn.hostname(), '9000') } if oo: info['JobFlow'] = { 'Oozie': 'http://%s:%s' % (oo.management_ip, '11000') } if hs: info['MapReduce JobHistory Server'] = { 'Web UI': 'http://%s:%s' % (hs.management_ip, '19888') } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def _set_cluster_info(self, cluster): mng = u.get_instances(cluster, 'manager')[0] nn = u.get_namenode(cluster) jt = u.get_jobtracker(cluster) oozie = u.get_oozie(cluster) #TODO(alazarev) make port configurable (bug #1262895) info = { 'IDH Manager': { 'Web UI': 'https://%s:9443' % mng.management_ip } } if jt: #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce'] = { 'Web UI': 'http://%s:50030' % jt.management_ip } #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname() if nn: #TODO(alazarev) make port configurable (bug #1262895) info['HDFS'] = {'Web UI': 'http://%s:50070' % nn.management_ip} #TODO(alazarev) make port configurable (bug #1262895) info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname() if oozie: #TODO(alazarev) make port configurable (bug #1262895) info['JobFlow'] = { 'Oozie': 'http://%s:11000' % oozie.management_ip } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def _set_cluster_info(self, cluster): mng = u.get_instances(cluster, 'manager')[0] nn = u.get_namenode(cluster) jt = u.get_jobtracker(cluster) oozie = u.get_oozie(cluster) #TODO(alazarev) make port configurable (bug #1262895) info = {'IDH Manager': { 'Web UI': 'https://%s:9443' % mng.management_ip }} if jt: #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce'] = { 'Web UI': 'http://%s:50030' % jt.management_ip } #TODO(alazarev) make port configurable (bug #1262895) info['MapReduce']['JobTracker'] = '%s:54311' % jt.hostname() if nn: #TODO(alazarev) make port configurable (bug #1262895) info['HDFS'] = { 'Web UI': 'http://%s:50070' % nn.management_ip } #TODO(alazarev) make port configurable (bug #1262895) info['HDFS']['NameNode'] = 'hdfs://%s:8020' % nn.hostname() if oozie: #TODO(alazarev) make port configurable (bug #1262895) info['JobFlow'] = { 'Oozie': 'http://%s:11000' % oozie.management_ip } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def _configure_services(client, cluster): nn_host = u.get_namenode(cluster).fqdn() snn = u.get_secondarynamenodes(cluster) snn_host = snn[0].fqdn() if snn else None jt_host = u.get_jobtracker(cluster).fqdn() if u.get_jobtracker( cluster) else None dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)] oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie( cluster) else None hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver( cluster) else None services = [] if u.get_namenode(cluster): services += ['hdfs'] if u.get_jobtracker(cluster): services += ['mapred'] if oozie_host: services += ['oozie'] services += ['pig'] if hive_host: services += ['hive'] LOG.debug("Add services: %s" % ', '.join(services)) client.services.add(services) LOG.debug("Assign roles to hosts") client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host]) client.services.hdfs.add_nodes('DataNode', dn_hosts) if snn: client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host]) if oozie_host: client.services.oozie.add_nodes('Oozie', [oozie_host]) if hive_host: client.services.hive.add_nodes('HiveServer', [hive_host]) if jt_host: client.services.mapred.add_nodes('JobTracker', [jt_host]) client.services.mapred.add_nodes('TaskTracker', tt_hosts)
def _configure_services(client, cluster): nn_host = u.get_namenode(cluster).fqdn() snn = u.get_secondarynamenodes(cluster) snn_host = snn[0].fqdn() if snn else None jt_host = u.get_jobtracker(cluster).fqdn() if u.get_jobtracker( cluster) else None dn_hosts = [dn.fqdn() for dn in u.get_datanodes(cluster)] tt_hosts = [tt.fqdn() for tt in u.get_tasktrackers(cluster)] oozie_host = u.get_oozie(cluster).fqdn() if u.get_oozie(cluster) else None hive_host = u.get_hiveserver(cluster).fqdn() if u.get_hiveserver( cluster) else None services = [] if u.get_namenode(cluster): services += ['hdfs'] if u.get_jobtracker(cluster): services += ['mapred'] if oozie_host: services += ['oozie'] services += ['pig'] if hive_host: services += ['hive'] LOG.debug("Add services: %s" % ', '.join(services)) client.services.add(services) LOG.debug("Assign roles to hosts") client.services.hdfs.add_nodes('PrimaryNameNode', [nn_host]) client.services.hdfs.add_nodes('DataNode', dn_hosts) if snn: client.services.hdfs.add_nodes('SecondaryNameNode', [snn_host]) if oozie_host: client.services.oozie.add_nodes('Oozie', [oozie_host]) if hive_host: client.services.hive.add_nodes('HiveServer', [hive_host]) if jt_host: client.services.mapred.add_nodes('JobTracker', [jt_host]) client.services.mapred.add_nodes('TaskTracker', tt_hosts)
def _get_hadoop_configs(node_group): cluster = node_group.cluster nn_hostname = utils.get_namenode(cluster).hostname() res_hostname = utils.get_resourcemanager(cluster).hostname() dirs = _get_hadoop_dirs(node_group) confs = { 'Hadoop': { 'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname }, 'HDFS': { 'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']), 'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']), 'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR, 'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR }, 'YARN': { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.resourcemanager.hostname': '%s' % res_hostname, 'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % (HADOOP_CONF_DIR), 'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % (HADOOP_CONF_DIR) }, 'MapReduce': { 'mapreduce.framework.name': 'yarn' }, } oozie = utils.get_oozie(cluster) if oozie: hadoop_cfg = { 'hadoop.proxyuser.hadoop.hosts': '*', 'hadoop.proxyuser.hadoop.groups': 'hadoop' } confs['Hadoop'].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs()) confs['JobFlow'] = oozie_cfg if c_helper.get_config_value(c_helper.ENABLE_SWIFT.applicable_target, c_helper.ENABLE_SWIFT.name, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config['name']] = config['value'] confs['Hadoop'].update(swift_configs) if c_helper.is_data_locality_enabled(cluster): confs['Hadoop'].update(th.TOPOLOGY_CONFIG) confs['Hadoop'].update( {"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) return confs, c_helper.get_env_configs()
def _get_hadoop_configs(node_group): cluster = node_group.cluster nn_hostname = utils.get_namenode(cluster).hostname() res_hostname = utils.get_resourcemanager(cluster).hostname() dirs = _get_hadoop_dirs(node_group) confs = { 'Hadoop': { 'fs.defaultFS': 'hdfs://%s:9000' % nn_hostname }, 'HDFS': { 'dfs.namenode.name.dir': ','.join(dirs['hadoop_name_dirs']), 'dfs.namenode.data.dir': ','.join(dirs['hadoop_data_dirs']), 'dfs.hosts': '%s/dn-include' % HADOOP_CONF_DIR, 'dfs.hosts.exclude': '%s/dn-exclude' % HADOOP_CONF_DIR }, 'YARN': { 'yarn.nodemanager.aux-services': 'mapreduce_shuffle', 'yarn.resourcemanager.hostname': '%s' % res_hostname, 'yarn.resourcemanager.nodes.include-path': '%s/nm-include' % ( HADOOP_CONF_DIR), 'yarn.resourcemanager.nodes.exclude-path': '%s/nm-exclude' % ( HADOOP_CONF_DIR) }, 'MapReduce': { 'mapreduce.framework.name': 'yarn' }, } oozie = utils.get_oozie(cluster) if oozie: hadoop_cfg = { 'hadoop.proxyuser.hadoop.hosts': '*', 'hadoop.proxyuser.hadoop.groups': 'hadoop' } confs['Hadoop'].update(hadoop_cfg) oozie_cfg = o_helper.get_oozie_required_xml_configs(HADOOP_CONF_DIR) if c_helper.is_mysql_enabled(cluster): oozie_cfg.update(o_helper.get_oozie_mysql_configs()) confs['JobFlow'] = oozie_cfg if c_helper.get_config_value(c_helper.ENABLE_SWIFT.applicable_target, c_helper.ENABLE_SWIFT.name, cluster): swift_configs = {} for config in swift.get_swift_configs(): swift_configs[config['name']] = config['value'] confs['Hadoop'].update(swift_configs) if c_helper.is_data_locality_enabled(cluster): confs['Hadoop'].update(th.TOPOLOGY_CONFIG) confs['Hadoop'].update({"topology.script.file.name": HADOOP_CONF_DIR + "/topology.sh"}) return confs, c_helper.get_env_configs()
def scale_cluster(self, cluster, instances): self._setup_instances(cluster, instances) run.refresh_nodes(remote.get_remote(utils.get_namenode(cluster)), "dfsadmin") jt = utils.get_jobtracker(cluster) if jt: run.refresh_nodes(remote.get_remote(jt), "mradmin") self._start_tt_dn_processes(instances)
def scale_cluster(self, cluster, instances): self._setup_instances(cluster, instances) run.refresh_nodes(remote.get_remote( utils.get_namenode(cluster)), "dfsadmin") jt = utils.get_jobtracker(cluster) if jt: run.refresh_nodes(remote.get_remote(jt), "mradmin") self._start_tt_dn_processes(instances)
def get_datanodes_status(cluster): statuses = {} namenode = u.get_namenode(cluster) status_regexp = r'^Hostname: (.*)\nDecommission Status : (.*)$' matcher = re.compile(status_regexp, re.MULTILINE) dfs_report = namenode.remote().execute_command( 'sudo su - -c "hdfs dfsadmin -report" hadoop')[1] for host, status in matcher.findall(dfs_report): statuses[host] = status.lower() return statuses
def start_cluster(self, cluster): nn_instance = utils.get_namenode(cluster) with remote.get_remote(nn_instance) as r: run.format_namenode(r) run.start_processes(r, "namenode") for snn in utils.get_secondarynamenodes(cluster): run.start_processes(remote.get_remote(snn), "secondarynamenode") jt_instance = utils.get_jobtracker(cluster) if jt_instance: run.start_processes(remote.get_remote(jt_instance), "jobtracker") self._start_tt_dn_processes(utils.get_instances(cluster)) self._await_datanodes(cluster) LOG.info("Hadoop services in cluster %s have been started" % cluster.name) oozie = utils.get_oozie(cluster) if oozie: with remote.get_remote(oozie) as r: if c_helper.is_mysql_enable(cluster): run.mysql_start(r, oozie) run.oozie_create_db(r) run.oozie_share_lib(r, nn_instance.hostname()) run.start_oozie(r) LOG.info("Oozie service at '%s' has been started", nn_instance.hostname()) hive_server = utils.get_hiveserver(cluster) if hive_server: with remote.get_remote(hive_server) as r: run.hive_create_warehouse_dir(r) run.hive_copy_shared_conf( r, edp.get_hive_shared_conf_path('hadoop')) if c_helper.is_mysql_enable(cluster): if not oozie or hive_server.hostname() != oozie.hostname(): run.mysql_start(r, hive_server) run.hive_create_db(r) run.hive_metastore_start(r) LOG.info("Hive Metastore server at %s has been started", hive_server.hostname()) LOG.info('Cluster %s has been started successfully' % cluster.name) self._set_cluster_info(cluster)
def await_datanodes(cluster): datanodes_count = len(u.get_datanodes(cluster)) if datanodes_count < 1: return LOG.info("Waiting %s datanodes to start up" % datanodes_count) with u.get_namenode(cluster).remote() as r: while True: if _check_datanodes_count(r, datanodes_count): LOG.info('Datanodes on cluster %s has been started' % cluster.name) return context.sleep(1) if not g.check_cluster_exists(cluster): LOG.info('Stop waiting datanodes on cluster %s since it has ' 'been deleted' % cluster.name) return
def decommission_nodes(self, cluster, instances): tts = utils.get_tasktrackers(cluster) dns = utils.get_datanodes(cluster) decommission_dns = False decommission_tts = False for i in instances: if 'datanode' in i.node_group.node_processes: dns.remove(i) decommission_dns = True if 'tasktracker' in i.node_group.node_processes: tts.remove(i) decommission_tts = True nn = utils.get_namenode(cluster) jt = utils.get_jobtracker(cluster) if decommission_tts: sc.decommission_tt(jt, instances, tts) if decommission_dns: sc.decommission_dn(nn, instances, dns)
def await_datanodes(cluster): datanodes_count = len(u.get_datanodes(cluster)) if datanodes_count < 1: return LOG.info("Waiting %s datanodes to start up" % datanodes_count) with u.get_namenode(cluster).remote() as r: while True: if _check_datanodes_count(r, datanodes_count): LOG.info( 'Datanodes on cluster %s has been started' % cluster.name) return context.sleep(1) if not g.check_cluster_exists(cluster): LOG.info( 'Stop waiting datanodes on cluster %s since it has ' 'been deleted' % cluster.name) return
def _set_cluster_info(self, cluster): nn = utils.get_namenode(cluster) jt = utils.get_jobtracker(cluster) oozie = utils.get_oozie(cluster) info = {} if jt: ui_port = c_helper.get_port_from_config( 'MapReduce', 'mapred.job.tracker.http.address', cluster) jt_port = c_helper.get_port_from_config('MapReduce', 'mapred.job.tracker', cluster) info['MapReduce'] = { 'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port), 'JobTracker': '%s:%s' % (jt.hostname(), jt_port) } if nn: ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address', cluster) nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name', cluster) info['HDFS'] = { 'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port), 'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port) } if oozie: #TODO(yrunts) change from hardcode value info['JobFlow'] = { 'Oozie': 'http://%s:11000' % oozie.management_ip } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def _set_cluster_info(self, cluster): nn = utils.get_namenode(cluster) jt = utils.get_jobtracker(cluster) oozie = utils.get_oozie(cluster) info = {} if jt: ui_port = c_helper.get_port_from_config( 'MapReduce', 'mapred.job.tracker.http.address', cluster) jt_port = c_helper.get_port_from_config( 'MapReduce', 'mapred.job.tracker', cluster) info['MapReduce'] = { 'Web UI': 'http://%s:%s' % (jt.management_ip, ui_port), 'JobTracker': '%s:%s' % (jt.hostname(), jt_port) } if nn: ui_port = c_helper.get_port_from_config('HDFS', 'dfs.http.address', cluster) nn_port = c_helper.get_port_from_config('HDFS', 'fs.default.name', cluster) info['HDFS'] = { 'Web UI': 'http://%s:%s' % (nn.management_ip, ui_port), 'NameNode': 'hdfs://%s:%s' % (nn.hostname(), nn_port) } if oozie: #TODO(yrunts) change from hardcode value info['JobFlow'] = { 'Oozie': 'http://%s:11000' % oozie.management_ip } ctx = context.ctx() conductor.cluster_update(ctx, cluster, {'info': info})
def refresh_hadoop_nodes(cluster): nn = u.get_namenode(cluster) nn.remote().execute_command( 'sudo su - -c "hdfs dfsadmin -refreshNodes" hadoop')