def _provision_cluster(self, name, cluster_spec, ambari_info, servers, version): # TODO(jspeidel): encapsulate in another class if servers: cpo.add_provisioning_step(servers[0].cluster_id, _("Provision cluster via Ambari"), len(servers)) with context.ThreadGroup() as tg: for server in servers: with context.set_current_instance_id( server.instance['instance_id']): tg.spawn( "hdp-provision-instance-%s" % server.instance.hostname(), server.provision_ambari, ambari_info, cluster_spec) handler = self.version_factory.get_version_handler(version) ambari_client = handler.get_ambari_client() ambari_client.wait_for_host_registrations(len(servers), ambari_info) self._set_ambari_credentials(cluster_spec, ambari_info, version) ambari_client.provision_cluster(cluster_spec, servers, ambari_info, name) LOG.info( _LI('Cluster provisioned via Ambari Server: {server_ip}').format( server_ip=ambari_info.get_address()))
def mount_to_instances(instances): if len(instances) == 0: return use_xfs = _can_use_xfs(instances) for instance in instances: with context.set_current_instance_id(instance.instance_id): devices = _find_instance_devices(instance) if devices: cpo.add_provisioning_step( instance.cluster_id, _("Mount volumes to {inst_name} instance").format( inst_name=instance.instance_name), len(devices)) formatted_devices = [] lock = threading.Lock() with context.ThreadGroup() as tg: # Since formating can take several minutes (for large # disks) and can be done in parallel, launch one thread # per disk. for device in devices: tg.spawn('format-device-%s' % device, _format_device, instance, device, use_xfs, formatted_devices, lock) conductor.instance_update( context.current(), instance, {"storage_devices_number": len(formatted_devices)}) for idx, dev in enumerate(formatted_devices): _mount_volume_to_node(instance, idx+1, dev, use_xfs)
def _add_hosts_and_components( self, cluster_spec, servers, ambari_info, name): add_host_url = 'http://{0}/api/v1/clusters/{1}/hosts/{2}' add_host_component_url = ('http://{0}/api/v1/clusters/{1}' '/hosts/{2}/host_components/{3}') for host in servers: with context.set_current_instance_id(host.instance['instance_id']): hostname = host.instance.fqdn().lower() result = self._post( add_host_url.format(ambari_info.get_address(), name, hostname), ambari_info) if result.status_code != 201: LOG.error( _LE('Create host command failed. {result}').format( result=result.text)) raise ex.HadoopProvisionError( _('Failed to add host: %s') % result.text) node_group_name = host.node_group.name # TODO(jspeidel): ensure that node group exists node_group = cluster_spec.node_groups[node_group_name] for component in node_group.components: # don't add any AMBARI components if component.find('AMBARI') != 0: result = self._post(add_host_component_url.format( ambari_info.get_address(), name, hostname, component), ambari_info) if result.status_code != 201: LOG.error( _LE('Create host_component command failed. ' '{result}').format(result=result.text)) raise ex.HadoopProvisionError( _('Failed to add host component: %s') % result.text)
def start_oozie_process(pctx, instance, backup=None): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: if c_helper.is_mysql_enabled(pctx, instance.cluster): _start_mysql(r) if backup is None: LOG.debug("Creating Oozie DB Schema") sql_script = files.get_file_text( 'plugins/sandbox/hadoop2/resources/create_oozie_db.sql' ) password = oozie_helper.get_oozie_mysql_configs( instance.cluster )['oozie.service.JPAService.jdbc.password'] sql_script = sql_script.replace("password", password) script_location = "create_oozie_db.sql" r.write_file_to(script_location, sql_script) r.execute_command('mysql -u root < %(script_location)s && ' 'rm %(script_location)s' % {"script_location": script_location}) if backup is None: _oozie_share_lib(r) _start_oozie(r)
def _await_networks(self, cluster, instances): if not instances: return cpo.add_provisioning_step(cluster.id, _("Assign IPs"), len(instances)) ips_assigned = set() self._ips_assign(ips_assigned, cluster, instances) LOG.info("All instances have IPs assigned") cluster = conductor.cluster_get(context.ctx(), cluster) instances = cluster_utils.get_instances(cluster, ips_assigned) cpo.add_provisioning_step(cluster.id, _("Wait for instance accessibility"), len(instances)) with context.ThreadGroup() as tg: for instance in instances: with context.set_current_instance_id(instance.instance_id): tg.spawn("wait-for-ssh-%s" % instance.instance_name, self._wait_until_accessible, instance) LOG.info("All instances are accessible")
def start_hiveserver_process(pctx, instance, backup=None): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: if backup is None: _hive_create_warehouse_dir(r) _hive_copy_shared_conf(r, edp.get_hive_shared_conf_path('hadoop'), backup) if c_helper.is_mysql_enabled(pctx, instance.cluster): oozie = vu.get_oozie(instance.node_group.cluster) if not oozie or instance.hostname() != oozie.hostname(): _start_mysql(r) if backup is None: sql_script = files.get_file_text( 'plugins/sandbox/hadoop2/resources/create_hive_db.sql') sql_script = sql_script.replace( '{{password}}', u.get_hive_password(instance.cluster)) r.write_file_to('/tmp/create_hive_db.sql', sql_script) _hive_create_db(r) _hive_metastore_start(r) _hive_hiveserver2_start(r) LOG.info( _LI("Hive Metastore server at {host} has been " "started").format(host=instance.hostname()))
def mount_to_instances(instances): if len(instances) == 0: return use_xfs = _can_use_xfs(instances) for instance in instances: with context.set_current_instance_id(instance.instance_id): devices = _find_instance_devices(instance) if devices: cpo.add_provisioning_step( instance.cluster_id, _("Mount volumes to {inst_name} instance").format( inst_name=instance.instance_name), len(devices)) formatted_devices = [] lock = threading.Lock() with context.ThreadGroup() as tg: # Since formating can take several minutes (for large # disks) and can be done in parallel, launch one thread # per disk. for device in devices: tg.spawn('format-device-%s' % device, _format_device, instance, device, use_xfs, formatted_devices, lock) conductor.instance_update( context.current(), instance, {"storage_devices_number": len(formatted_devices)}) for idx, dev in enumerate(formatted_devices): _mount_volume_to_node(instance, idx + 1, dev, use_xfs)
def _provision_cluster(self, name, cluster_spec, ambari_info, servers, version): # TODO(jspeidel): encapsulate in another class if servers: cpo.add_provisioning_step( servers[0].cluster_id, _("Provision cluster via Ambari"), len(servers)) with context.ThreadGroup() as tg: for server in servers: with context.set_current_instance_id( server.instance['instance_id']): tg.spawn( "hdp-provision-instance-%s" % server.instance.hostname(), server.provision_ambari, ambari_info, cluster_spec) handler = self.version_factory.get_version_handler(version) ambari_client = handler.get_ambari_client() ambari_client.wait_for_host_registrations(len(servers), ambari_info) self._set_ambari_credentials(cluster_spec, ambari_info, version) ambari_client.provision_cluster( cluster_spec, servers, ambari_info, name) LOG.info(_LI('Cluster provisioned via Ambari Server: {server_ip}') .format(server_ip=ambari_info.get_address()))
def _assign_floating_ips(self, instances): for instance in instances: with context.set_current_instance_id(instance.instance_id): node_group = instance.node_group if node_group.floating_ip_pool: networks.assign_floating_ip(instance.instance_id, node_group.floating_ip_pool)
def start_spark_history_server(master): sp_home = c_helper.get_spark_home(master.cluster) with context.set_current_instance_id(master.instance_id): with master.remote() as r: r.execute_command( 'sudo su - -c "bash %s" hadoop' % os.path.join(sp_home, 'sbin/start-history-server.sh'))
def start_spark(master): sp_home = c_helper.get_spark_home(master.cluster) with context.set_current_instance_id(master.instance_id): with master.remote() as r: r.execute_command('sudo su - -c "bash %s" hadoop' % os.path.join(sp_home, 'sbin/start-all.sh')) LOG.info("Spark service has been started")
def _scale_cluster_instances(self, cluster, node_group_id_map): ctx = context.ctx() aa_group = None old_aa_groups = None if cluster.anti_affinity: aa_group = self._find_aa_server_group(cluster) if not aa_group: old_aa_groups = self._generate_anti_affinity_groups(cluster) instances_to_delete = [] node_groups_to_enlarge = set() node_groups_to_delete = set() for node_group in cluster.node_groups: new_count = node_group_id_map[node_group.id] if new_count < node_group.count: instances_to_delete += node_group.instances[new_count: node_group.count] if new_count == 0: node_groups_to_delete.add(node_group.id) elif new_count > node_group.count: node_groups_to_enlarge.add(node_group.id) if node_group.count == 0 and node_group.auto_security_group: self._create_auto_security_group(node_group) if instances_to_delete: cluster = c_u.change_cluster_status( cluster, c_u.CLUSTER_STATUS_DELETING_INSTANCES) for instance in instances_to_delete: with context.set_current_instance_id(instance.instance_id): self._shutdown_instance(instance) self._await_deleted(cluster, instances_to_delete) for ng in cluster.node_groups: if ng.id in node_groups_to_delete: self._delete_auto_security_group(ng) cluster = conductor.cluster_get(ctx, cluster) instances_to_add = [] if node_groups_to_enlarge: cpo.add_provisioning_step( cluster.id, _("Add instances"), self._count_instances_to_scale( node_groups_to_enlarge, node_group_id_map, cluster)) cluster = c_u.change_cluster_status( cluster, c_u.CLUSTER_STATUS_ADDING_INSTANCES) for ng in cluster.node_groups: if ng.id in node_groups_to_enlarge: count = node_group_id_map[ng.id] for idx in six.moves.xrange(ng.count + 1, count + 1): instance_id = self._start_instance( cluster, ng, idx, aa_group, old_aa_groups) instances_to_add.append(instance_id) return instances_to_add
def _await_networks(self, cluster, instances): if not instances: return cpo.add_provisioning_step(cluster.id, _("Assign IPs"), len(instances)) ips_assigned = set() self._ips_assign(ips_assigned, cluster, instances) LOG.info( _LI("All instances have IPs assigned")) cluster = conductor.cluster_get(context.ctx(), cluster) instances = g.get_instances(cluster, ips_assigned) cpo.add_provisioning_step( cluster.id, _("Wait for instance accessibility"), len(instances)) with context.ThreadGroup() as tg: for instance in instances: with context.set_current_instance_id(instance.instance_id): tg.spawn("wait-for-ssh-%s" % instance.instance_name, self._wait_until_accessible, instance) LOG.info(_LI("All instances are accessible"))
def _disable_repos_on_inst(instance): with context.set_current_instance_id(instance_id=instance.instance_id): with instance.remote() as r: tmp_name = "/tmp/yum.repos.d-%s" % instance.instance_id[:8] sudo = functools.partial(r.execute_command, run_as_root=True) # moving to other folder sudo("mv /etc/yum.repos.d/ {fold_name}".format(fold_name=tmp_name)) sudo("mkdir /etc/yum.repos.d")
def _shutdown_instances(self, cluster): for node_group in cluster.node_groups: for instance in node_group.instances: with context.set_current_instance_id(instance.instance_id): self._shutdown_instance(instance) self._await_deleted(cluster, node_group.instances) self._delete_auto_security_group(node_group)
def _rollback_cluster_scaling(self, cluster, instances, ex): """Attempt to rollback cluster scaling.""" for i in instances: with context.set_current_instance_id(i.instance_id): self._shutdown_instance(i) cluster = conductor.cluster_get(context.ctx(), cluster) c_u.clean_cluster_from_empty_ng(cluster)
def _rollback_cluster_scaling(self, cluster, instances, ex): """Attempt to rollback cluster scaling.""" for i in instances: with context.set_current_instance_id(i.instance_id): self._shutdown_instance(i) cluster = conductor.cluster_get(context.ctx(), cluster) g.clean_cluster_from_empty_ng(cluster)
def _disable_repos_on_inst(instance): with context.set_current_instance_id(instance_id=instance.instance_id): with instance.remote() as r: tmp_name = "/tmp/yum.repos.d-%s" % instance.instance_id[:8] sudo = functools.partial(r.execute_command, run_as_root=True) # moving to other folder sudo("mv /etc/yum.repos.d/ {fold_name}".format( fold_name=tmp_name)) sudo("mkdir /etc/yum.repos.d")
def configure_instances(pctx, instances): if len(instances) == 0: return cpo.add_provisioning_step(instances[0].cluster_id, _("Configure instances"), len(instances)) for instance in instances: with context.set_current_instance_id(instance.instance_id): _configure_instance(pctx, instance)
def _check_active(self, active_ids, cluster, instances): if not c_u.check_cluster_exists(cluster): return True for instance in instances: if instance.id not in active_ids: with context.set_current_instance_id(instance.instance_id): if self._check_if_active(instance): active_ids.add(instance.id) cpo.add_successful_event(instance) return len(instances) == len(active_ids)
def install_swift_integration(self, servers): if servers: cpo.add_provisioning_step(servers[0].cluster_id, _("Install Swift integration"), len(servers)) for server in servers: with context.set_current_instance_id( server.instance['instance_id']): server.install_swift_integration()
def _ips_assign(self, ips_assigned, cluster, instances): if not cluster_utils.check_cluster_exists(cluster): return True for instance in instances: if instance.id not in ips_assigned: with context.set_current_instance_id(instance.instance_id): if networks.init_instances_ips(instance): ips_assigned.add(instance.id) cpo.add_successful_event(instance) return len(ips_assigned) == len(instances)
def start_zk_server(instances): cpo.add_provisioning_step(instances[0].cluster_id, pu.start_process_event_message("ZooKeeper"), len(instances)) with context.ThreadGroup() as tg: for instance in instances: with context.set_current_instance_id(instance.instance_id): tg.spawn('ZK-start-processes-%s' % instance.instance_name, _start_zk_processes, instance, 'start')
def install_swift_integration(self, servers): if servers: cpo.add_provisioning_step( servers[0].cluster_id, _("Install Swift integration"), len(servers)) for server in servers: with context.set_current_instance_id( server.instance['instance_id']): server.install_swift_integration()
def _check_active(self, active_ids, cluster, instances): if not g.check_cluster_exists(cluster): return True for instance in instances: if instance.id not in active_ids: with context.set_current_instance_id(instance.instance_id): if self._check_if_active(instance): active_ids.add(instance.id) cpo.add_successful_event(instance) return len(instances) == len(active_ids)
def _start_oozie(self, cluster, oozie): nn_instance = vu.get_namenode(cluster) with remote.get_remote(oozie) as r: with context.set_current_instance_id(oozie.instance_id): if c_helper.is_mysql_enable(cluster): run.mysql_start(r) run.oozie_create_db(r) run.oozie_share_lib(r, nn_instance.hostname()) run.start_oozie(r) LOG.info(_LI("Oozie service has been started"))
def _check_deleted(self, deleted_ids, cluster, instances): if not cluster_utils.check_cluster_exists(cluster): return True for instance in instances: if instance.id not in deleted_ids: with context.set_current_instance_id(instance.instance_id): if self._check_if_deleted(instance): LOG.debug("Instance is deleted") deleted_ids.add(instance.id) cpo.add_successful_event(instance) return len(deleted_ids) == len(instances)
def _check_deleted(self, deleted_ids, cluster, instances): if not g.check_cluster_exists(cluster): return True for instance in instances: if instance.id not in deleted_ids: with context.set_current_instance_id(instance.instance_id): if self._check_if_deleted(instance): LOG.debug("Instance is deleted") deleted_ids.add(instance.id) cpo.add_successful_event(instance) return len(deleted_ids) == len(instances)
def _disable_repos_on_inst(instance): with context.set_current_instance_id(instance_id=instance.instance_id): with instance.remote() as r: sudo = functools.partial(r.execute_command, run_as_root=True) if r.get_os_distrib() == "ubuntu": sudo("mv /etc/apt/sources.list /etc/apt/sources.list.tmp") else: tmp_name = "/tmp/yum.repos.d-%s" % instance.instance_id[:8] # moving to other folder sudo("mv /etc/yum.repos.d/ {fold_name}".format( fold_name=tmp_name)) sudo("mkdir /etc/yum.repos.d")
def _start_hiveserver(self, cluster, hive_server): oozie = vu.get_oozie(cluster) with remote.get_remote(hive_server) as r: with context.set_current_instance_id(hive_server.instance_id): run.hive_create_warehouse_dir(r) run.hive_copy_shared_conf(r, edp.get_hive_shared_conf_path("hadoop")) if c_helper.is_mysql_enable(cluster): if not oozie or hive_server.hostname() != oozie.hostname(): run.mysql_start(r) run.hive_create_db(r, cluster.extra["hive_mysql_passwd"]) run.hive_metastore_start(r) LOG.info(_LI("Hive Metastore server has been started"))
def _configure_ntp_on_instance(instance, url): with context.set_current_instance_id(instance.instance_id): LOG.debug("Configuring ntp server") with instance.remote() as r: if not _check_ntp_installed(r): # missing ntp service LOG.warning(_LW("Unable to configure NTP service")) return r.append_to_file("/etc/ntp.conf", "server {url}".format(url=url), run_as_root=True) _restart_ntp(r) _sudo(r, "ntpdate -u {url}".format(url=url)) LOG.info(_LI("NTP successfully configured"))
def _configure_ntp_on_instance(instance, url): with context.set_current_instance_id(instance.instance_id): LOG.debug("Configuring ntp server") with instance.remote() as r: if not _check_ntp_installed(r): # missing ntp service LOG.warning(_LW("Unable to configure NTP service")) return r.append_to_file( "/etc/ntp.conf", "server {url}".format(url=url), run_as_root=True) _restart_ntp(r) _sudo(r, "ntpdate -u {url}".format(url=url)) LOG.info(_LI("NTP successfully configured"))
def refresh_zk_servers(cluster, to_delete_instances=None): instances = vu.get_zk_servers(cluster) if to_delete_instances: for instance in to_delete_instances: if instance in instances: instances.remove(instance) cpo.add_provisioning_step(cluster.id, pu.start_process_event_message("ZooKeeper"), len(instances)) with context.ThreadGroup() as tg: for instance in instances: with context.set_current_instance_id(instance.instance_id): tg.spawn('ZK-restart-processes-%s' % instance.instance_name, _start_zk_processes, instance, 'restart')
def start_oozie_process(pctx, instance): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: if c_helper.is_mysql_enabled(pctx, instance.cluster): _start_mysql(r) LOG.debug("Creating Oozie DB Schema") sql_script = files.get_file_text( 'plugins/vanilla/hadoop2/resources/create_oozie_db.sql') script_location = "create_oozie_db.sql" r.write_file_to(script_location, sql_script) r.execute_command('mysql -u root < %(script_location)s && ' 'rm %(script_location)s' % {"script_location": script_location}) _oozie_share_lib(r) _start_oozie(r)
def _provision_key(instance, keypair): def append_to(remote, file, *args, **kwargs): kwargs['run_as_root'] = True path = "/home/hadoop/.ssh/%s" % file remote.append_to_file(path, *args, **kwargs) public, private = keypair['public'], keypair['private'] folder = '/home/hadoop/.ssh' with context.set_current_instance_id(instance_id=instance.instance_id): with instance.remote() as r: r.execute_command('sudo mkdir -p %s' % folder) append_to(r, 'authorized_keys', public) append_to(r, 'id_rsa', private) append_to(r, 'id_rsa.pub', public) r.execute_command('sudo chown -R hadoop %s' % folder) r.execute_command("sudo chmod 600 %s/id_rsa" % folder) LOG.debug("Passwordless ssh enabled")
def start_oozie_process(pctx, instance): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: if c_helper.is_mysql_enabled(pctx, instance.cluster): _start_mysql(r) LOG.debug("Creating Oozie DB Schema") sql_script = files.get_file_text("plugins/vanilla/hadoop2/resources/create_oozie_db.sql") script_location = "create_oozie_db.sql" r.write_file_to(script_location, sql_script) r.execute_command( "mysql -u root < %(script_location)s && " "rm %(script_location)s" % {"script_location": script_location} ) _oozie_share_lib(r) _start_oozie(r)
def attach_to_instances(instances): instances_to_attach = _count_instances_to_attach(instances) if instances_to_attach == 0: return cpo.add_provisioning_step(instances[0].cluster_id, _("Attach volumes to instances"), instances_to_attach) with context.ThreadGroup() as tg: for instance in instances: if instance.node_group.volumes_per_node > 0: with context.set_current_instance_id(instance.instance_id): tg.spawn( 'attach-volumes-for-instance-%s' % instance.instance_name, _attach_volumes_to_node, instance.node_group, instance)
def start_hiveserver_process(pctx, instance): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: _hive_create_warehouse_dir(r) _hive_copy_shared_conf(r, edp.get_hive_shared_conf_path("hadoop")) if c_helper.is_mysql_enabled(pctx, instance.cluster): oozie = vu.get_oozie(instance.node_group.cluster) if not oozie or instance.hostname() != oozie.hostname(): _start_mysql(r) sql_script = files.get_file_text("plugins/vanilla/hadoop2/resources/create_hive_db.sql") r.write_file_to("/tmp/create_hive_db.sql", sql_script) _hive_create_db(r) _hive_metastore_start(r) LOG.info(_LI("Hive Metastore server at {host} has been " "started").format(host=instance.hostname()))
def attach_to_instances(instances): instances_to_attach = _count_instances_to_attach(instances) if instances_to_attach == 0: return cpo.add_provisioning_step( instances[0].cluster_id, _("Attach volumes to instances"), instances_to_attach) with context.ThreadGroup() as tg: for instance in instances: if instance.node_group.volumes_per_node > 0: with context.set_current_instance_id(instance.instance_id): tg.spawn( 'attach-volumes-for-instance-%s' % instance.instance_name, _attach_volumes_to_node, instance.node_group, instance)
def _configure_ntp_on_instance(instance, url): with context.set_current_instance_id(instance.instance_id): LOG.debug("Configuring ntp server") with instance.remote() as r: if not _check_ntp_installed(r): # missing ntp service LOG.warning("Unable to configure NTP service") return r.prepend_to_file( "/etc/ntp.conf", "server {url} iburst\n".format(url=url), run_as_root=True) _restart_ntp(r) try: _sudo(r, "ntpdate -u {url}".format(url=url)) except Exception as e: LOG.debug("Update time on VM failed with error: %s", e) LOG.info("NTP successfully configured")
def _configure_instances(self, cluster): """Configure active instances. * generate /etc/hosts * setup passwordless login * etc. """ hosts_file = g.generate_etc_hosts(cluster) cpo.add_provisioning_step(cluster.id, _("Configure instances"), g.count_instances(cluster)) with context.ThreadGroup() as tg: for node_group in cluster.node_groups: for instance in node_group.instances: with context.set_current_instance_id(instance.instance_id): tg.spawn( "configure-instance-%s" % instance.instance_name, self._configure_instance, instance, hosts_file)
def _configure_instances(self, cluster): """Configure active instances. * generate /etc/hosts * setup passwordless login * etc. """ hosts_file = g.generate_etc_hosts(cluster) cpo.add_provisioning_step( cluster.id, _("Configure instances"), g.count_instances(cluster)) with context.ThreadGroup() as tg: for node_group in cluster.node_groups: for instance in node_group.instances: with context.set_current_instance_id(instance.instance_id): tg.spawn( "configure-instance-%s" % instance.instance_name, self._configure_instance, instance, hosts_file)
def _configure_ntp_on_instance(instance, url): with context.set_current_instance_id(instance.instance_id): LOG.debug("Configuring ntp server") with instance.remote() as r: if not _check_ntp_installed(r): # missing ntp service LOG.warning("Unable to configure NTP service") return r.prepend_to_file("/etc/ntp.conf", "server {url} iburst\n".format(url=url), run_as_root=True) _restart_ntp(r) try: _sudo(r, "ntpdate -u {url}".format(url=url)) except Exception as e: LOG.debug("Update time on VM failed with error: %s", e) LOG.info("NTP successfully configured")
def mount_to_instances(instances): if len(instances) == 0: return cpo.add_provisioning_step( instances[0].cluster_id, _("Mount volumes to instances"), _count_volumes_to_mount(instances)) with context.ThreadGroup() as tg: for instance in instances: with context.set_current_instance_id(instance.instance_id): devices = _find_instance_volume_devices(instance) # Since formating can take several minutes (for large disks) # and can be done in parallel, launch one thread per disk. for idx in range(0, instance.node_group.volumes_per_node): tg.spawn( 'mount-volume-%d-to-node-%s' % (idx, instance.instance_name), _mount_volume_to_node, instance, idx, devices[idx])
def start_dn_nm_processes(instances): filternames = ['datanode', 'nodemanager'] instances = pu.instances_with_services(instances, filternames) if len(instances) == 0: return cpo.add_provisioning_step( instances[0].cluster_id, pu.start_process_event_message("DataNodes, NodeManagers"), len(instances)) with context.ThreadGroup() as tg: for instance in instances: with context.set_current_instance_id(instance.instance_id): processes = set(instance.node_group.node_processes) processes = processes.intersection(filternames) tg.spawn('vanilla-start-processes-%s' % instance.instance_name, _start_processes, instance, list(processes))
def _configure_instances(self, cluster): """Configure active instances. * generate /etc/hosts * change /etc/resolv.conf * setup passwordless login * etc. """ cpo.add_provisioning_step(cluster.id, _("Configure instances"), cluster_utils.count_instances(cluster)) with context.ThreadGroup() as tg: for node_group in cluster.node_groups: for instance in node_group.instances: with context.set_current_instance_id(instance.instance_id): tg.spawn( "configure-instance-{}".format( instance.instance_name), self._configure_instance, instance, cluster)
def mount_to_instances(instances): if len(instances) == 0: return cpo.add_provisioning_step(instances[0].cluster_id, _("Mount volumes to instances"), _count_volumes_to_mount(instances)) with context.ThreadGroup() as tg: for instance in instances: with context.set_current_instance_id(instance.instance_id): devices = _find_instance_volume_devices(instance) # Since formating can take several minutes (for large disks) # and can be done in parallel, launch one thread per disk. for idx in range(0, instance.node_group.volumes_per_node): tg.spawn( 'mount-volume-%d-to-node-%s' % (idx, instance.instance_name), _mount_volume_to_node, instance, idx, devices[idx])
def scale_cluster(self, cluster, instances): handler = self.version_factory.get_version_handler( cluster.hadoop_version) ambari_client = handler.get_ambari_client() cluster_spec = handler.get_cluster_spec( cluster, self._map_to_user_inputs(cluster.hadoop_version, cluster.cluster_configs)) rpm = self._get_rpm_uri(cluster_spec) servers = [] for instance in instances: host_role = utils.get_host_role(instance) servers.append( h.HadoopServer(instance, cluster_spec.node_groups[host_role], ambari_rpm=rpm)) ambari_info = self.get_ambari_info(cluster_spec) self._update_ambari_info_credentials(cluster_spec, ambari_info) cpo.add_provisioning_step(cluster.id, _("Provision cluster via Ambari"), len(servers)) with context.ThreadGroup() as tg: for server in servers: with context.set_current_instance_id( server.instance['instance_id']): tg.spawn('Ambari provisioning thread', server.provision_ambari, ambari_info, cluster_spec) ambari_client.configure_scaled_cluster_instances( cluster.name, cluster_spec, self._get_num_hosts(cluster), ambari_info) self._configure_topology_for_cluster(cluster, servers) ambari_client.start_scaled_cluster_instances(cluster.name, cluster_spec, servers, ambari_info) ambari_client.cleanup(ambari_info)
def _configure_instances(self, cluster): """Configure active instances. * generate /etc/hosts * change /etc/resolv.conf * setup passwordless login * etc. """ cpo.add_provisioning_step( cluster.id, _("Configure instances"), cluster_utils.count_instances(cluster)) with context.ThreadGroup() as tg: for node_group in cluster.node_groups: for instance in node_group.instances: with context.set_current_instance_id(instance.instance_id): tg.spawn("configure-instance-{}".format( instance.instance_name), self._configure_instance, instance, cluster )
def scale_cluster(self, cluster, instances): handler = self.version_factory.get_version_handler( cluster.hadoop_version) ambari_client = handler.get_ambari_client() cluster_spec = handler.get_cluster_spec( cluster, self._map_to_user_inputs( cluster.hadoop_version, cluster.cluster_configs)) rpm = self._get_rpm_uri(cluster_spec) servers = [] for instance in instances: host_role = utils.get_host_role(instance) servers.append(h.HadoopServer(instance, cluster_spec.node_groups [host_role], ambari_rpm=rpm)) ambari_info = self.get_ambari_info(cluster_spec) self._update_ambari_info_credentials(cluster_spec, ambari_info) cpo.add_provisioning_step( cluster.id, _("Provision cluster via Ambari"), len(servers)) with context.ThreadGroup() as tg: for server in servers: with context.set_current_instance_id( server.instance['instance_id']): tg.spawn('Ambari provisioning thread', server.provision_ambari, ambari_info, cluster_spec) ambari_client.configure_scaled_cluster_instances( cluster.name, cluster_spec, self._get_num_hosts(cluster), ambari_info) self._configure_topology_for_cluster(cluster, servers) ambari_client.start_scaled_cluster_instances(cluster.name, cluster_spec, servers, ambari_info) ambari_client.cleanup(ambari_info)
def start_oozie_process(pctx, instance): with context.set_current_instance_id(instance.instance_id): with instance.remote() as r: if c_helper.is_mysql_enabled(pctx, instance.cluster): _start_mysql(r) LOG.debug("Creating Oozie DB Schema") sql_script = files.get_file_text( 'plugins/vanilla/hadoop2/resources/create_oozie_db.sql') password = oozie_helper.get_oozie_mysql_configs( instance.cluster)[ 'oozie.service.JPAService.jdbc.password'] sql_script = sql_script.replace("password", password) script_location = "create_oozie_db.sql" r.write_file_to(script_location, sql_script) r.execute_command('mysql -u root < %(script_location)s && ' 'rm %(script_location)s' % {"script_location": script_location}) _oozie_share_lib(r) _start_oozie(r)
def _log_command(self, str): with context.set_current_instance_id(self.instance.instance_id): LOG.debug(str)
def mount_shares_to_node_group(self): """Mounts all configured shares to the node group.""" for instance in self.node_group.instances: with context.set_current_instance_id(instance.instance_id): self._mount_shares_to_instance(instance)