def check_swift_availability(self, cluster_info): plugin_config = cluster_info['plugin_config'] # Make unique name of Swift container during Swift testing swift_container_name = 'Swift-test-' + str(uuid.uuid4())[:8] extra_script_parameters = { 'OS_TENANT_NAME': self.common_config.OS_TENANT_NAME, 'OS_USERNAME': self.common_config.OS_USERNAME, 'OS_PASSWORD': self.common_config.OS_PASSWORD, 'HADOOP_USER': plugin_config.HADOOP_USER, 'SWIFT_CONTAINER_NAME': swift_container_name } namenode_ip = cluster_info['node_info']['namenode_ip'] self.open_ssh_connection(namenode_ip, plugin_config.SSH_USERNAME) try: self.transfer_helper_script_to_node( 'swift_test_script.sh', parameter_list=extra_script_parameters) except Exception as e: with excutils.save_and_reraise_exception(): print(str(e)) swift = self.connect_to_swift() swift.put_container(swift_container_name) try: self.execute_command('./script.sh') except Exception as e: with excutils.save_and_reraise_exception(): print(str(e)) finally: self.delete_swift_container(swift, swift_container_name) self.close_ssh_connection()
def check_swift_availability(self, cluster_info): plugin_config = cluster_info['plugin_config'] # Make unique name of Swift container during Swift testing swift_container_name = 'Swift-test-' + str(uuid.uuid4())[:8] extra_script_parameters = { 'OS_TENANT_NAME': self.common_config.OS_TENANT_NAME, 'OS_USERNAME': self.common_config.OS_USERNAME, 'OS_PASSWORD': self.common_config.OS_PASSWORD, 'HADOOP_USER': plugin_config.HADOOP_USER, 'SWIFT_CONTAINER_NAME': swift_container_name } namenode_ip = cluster_info['node_info']['namenode_ip'] self.open_ssh_connection(namenode_ip, plugin_config.SSH_USERNAME) try: self.transfer_helper_script_to_node( 'swift_test_script.sh', parameter_list=extra_script_parameters ) except Exception as e: with excutils.save_and_reraise_exception(): print(str(e)) swift = self.connect_to_swift() swift.put_container(swift_container_name) try: self.execute_command('./script.sh') except Exception as e: with excutils.save_and_reraise_exception(): print(str(e)) finally: self.delete_swift_container(swift, swift_container_name) self.close_ssh_connection()
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # update nodegroup image usernames for nodegroup in cluster.node_groups: conductor.node_group_update( ctx, nodegroup, {"image_username": INFRA.get_node_group_image_username(nodegroup)}) cluster = conductor.cluster_get(ctx, cluster) # validating cluster try: cluster = conductor.cluster_update(ctx, cluster, {"status": "Validating"}) LOG.info(g.format_cluster_status(cluster)) plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): cluster = conductor.cluster_update(ctx, cluster, { "status": "Error", "status_description": str(e) }) LOG.info(g.format_cluster_status(cluster)) context.spawn("cluster-creating-%s" % cluster.id, _provision_cluster, cluster.id) if CONF.use_identity_api_v3 and cluster.is_transient: trusts.create_trust(cluster) return conductor.cluster_get(ctx, cluster.id)
def try_get_image_id_and_ssh_username(parameter, value): try: return image.id, image.metadata[imgs.PROP_USERNAME] except KeyError: with excutils.save_and_reraise_exception(): print_error_log(parameter, value)
def cluster_config_testing(self, cluster_info): cluster_id = cluster_info['cluster_id'] data = self.sahara.clusters.get(cluster_id) self._compare_configs( {'Enable Swift': True}, data.cluster_configs['general'] ) self._compare_configs( CLUSTER_HDFS_CONFIG, data.cluster_configs['HDFS'] ) self._compare_configs( CLUSTER_MR_CONFIG, data.cluster_configs['MapReduce'] ) node_groups = data.node_groups self._check_configs_for_node_groups(node_groups) node_ip_list_with_node_processes = ( self.get_cluster_node_ip_list_with_node_processes(cluster_id)) try: self.transfer_helper_script_to_nodes( node_ip_list_with_node_processes, self.vanilla_config.SSH_USERNAME, 'cluster_config_test_script.sh' ) except Exception as e: with excutils.save_and_reraise_exception(): print(str(e)) self._check_config_application_on_cluster_nodes( node_ip_list_with_node_processes )
def get_cluster_info(self, plugin_config): node_ip_list_with_node_processes = self.get_cluster_node_ip_list_with_node_processes(self.cluster_id) try: node_info = self.get_node_info(node_ip_list_with_node_processes, plugin_config) except Exception as e: with excutils.save_and_reraise_exception(): print("\nFailure during check of node process deployment " "on cluster node: " + str(e)) # For example: method "create_cluster_and_get_info" return # { # 'node_info': { # 'tasktracker_count': 3, # 'node_count': 6, # 'namenode_ip': '172.18.168.242', # 'datanode_count': 3 # }, # 'cluster_id': 'bee5c6a1-411a-4e88-95fc-d1fbdff2bb9d', # 'node_ip_list': { # '172.18.168.153': ['tasktracker', 'datanode'], # '172.18.168.208': ['secondarynamenode', 'oozie'], # '172.18.168.93': ['tasktracker'], # '172.18.168.101': ['tasktracker', 'datanode'], # '172.18.168.242': ['namenode', 'jobtracker'], # '172.18.168.167': ['datanode'] # }, # 'plugin_config': <oslo.config.cfg.GroupAttr object at 0x215d9d> # } return { "cluster_id": self.cluster_id, "node_ip_list": node_ip_list_with_node_processes, "node_info": node_info, "plugin_config": plugin_config, }
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # update nodegroup image usernames for nodegroup in cluster.node_groups: conductor.node_group_update( ctx, nodegroup, {"image_username": INFRA.get_node_group_image_username(nodegroup)}) cluster = conductor.cluster_get(ctx, cluster) # validating cluster try: cluster = conductor.cluster_update(ctx, cluster, {"status": "Validating"}) LOG.info(g.format_cluster_status(cluster)) plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): cluster = conductor.cluster_update(ctx, cluster, {"status": "Error", "status_description": str(e)}) LOG.info(g.format_cluster_status(cluster)) context.spawn("cluster-creating-%s" % cluster.id, _provision_cluster, cluster.id) if CONF.use_identity_api_v3 and cluster.is_transient: trusts.create_trust(cluster) return conductor.cluster_get(ctx, cluster.id)
def _add_params_to_script_and_transfer_to_node(self, cluster_info, node_group, node_with_volumes=False): plugin_config = cluster_info['plugin_config'] hadoop_log_directory = plugin_config.HADOOP_LOG_DIRECTORY if node_with_volumes: hadoop_log_directory = ( plugin_config.HADOOP_LOG_DIRECTORY_ON_VOLUME) extra_script_parameters = { 'HADOOP_VERSION': plugin_config.HADOOP_VERSION, 'HADOOP_DIRECTORY': plugin_config.HADOOP_DIRECTORY, 'HADOOP_EXAMPLES_JAR_PATH': plugin_config.HADOOP_EXAMPLES_JAR_PATH, 'HADOOP_LOG_DIRECTORY': hadoop_log_directory, 'HADOOP_USER': plugin_config.HADOOP_USER, 'NODE_COUNT': cluster_info['node_info']['node_count'], 'PLUGIN_NAME': plugin_config.PLUGIN_NAME } for instance in node_group['instances']: try: self.open_ssh_connection( instance['management_ip'], plugin_config.SSH_USERNAME) self.transfer_helper_script_to_node( 'map_reduce_test_script.sh', extra_script_parameters ) self.close_ssh_connection() except Exception as e: with excutils.save_and_reraise_exception(): print(str(e))
def create_cluster(self, cluster): ctx = context.ctx() launcher = _CreateLauncher() try: target_count = self._get_ng_counts(cluster) self._nullify_ng_counts(cluster) cluster = conductor.cluster_get(ctx, cluster) launcher.launch_instances(ctx, cluster, target_count) cluster = conductor.cluster_get(ctx, cluster) self._add_volumes(ctx, cluster) except Exception as ex: with excutils.save_and_reraise_exception(): if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return self._log_operation_exception( _LW("Can't start cluster '%(cluster)s' " "(reason: %(reason)s)"), cluster, ex) cluster = g.change_cluster_status( cluster, "Error", status_description=six.text_type(ex)) self._rollback_cluster_creation(cluster)
def create_cluster(self, cluster): ctx = context.ctx() launcher = _CreateLauncher() try: target_count = self._get_ng_counts(cluster) self._nullify_ng_counts(cluster) cluster = conductor.cluster_get(ctx, cluster) launcher.launch_instances(ctx, cluster, target_count) except Exception as ex: with excutils.save_and_reraise_exception(): if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return self._log_operation_exception( "Can't start cluster '%s' (reason: %s)", cluster, ex) cluster = conductor.cluster_update( ctx, cluster, {"status": "Error", "status_description": str(ex)}) LOG.info(g.format_cluster_status(cluster)) self._rollback_cluster_creation(cluster)
def scale_cluster(self, cluster, node_group_id_map): ctx = context.ctx() instance_ids = [] try: instance_ids = self._scale_cluster_instances(cluster, node_group_id_map) cluster = conductor.cluster_get(ctx, cluster) g.clean_cluster_from_empty_ng(cluster) cluster = conductor.cluster_get(ctx, cluster) instances = g.get_instances(cluster, instance_ids) self._await_active(cluster, instances) if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return [] self._assign_floating_ips(instances) self._await_networks(cluster, instances) if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return [] cluster = conductor.cluster_get(ctx, cluster) volumes.attach_to_instances( g.get_instances(cluster, instance_ids)) except Exception as ex: with excutils.save_and_reraise_exception(): if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return [] self._log_operation_exception( "Can't scale cluster '%s' (reason: %s)", cluster, ex) cluster = conductor.cluster_get(ctx, cluster) self._rollback_cluster_scaling( cluster, g.get_instances(cluster, instance_ids), ex) instance_ids = [] cluster = conductor.cluster_get(ctx, cluster) g.clean_cluster_from_empty_ng(cluster) cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster)) # we should be here with valid cluster: if instances creation # was not successful all extra-instances will be removed above if instance_ids: self._configure_instances(cluster) return instance_ids
def __enter__(self): _acquire_remote_semaphore() try: self.bulk = BulkInstanceInteropHelper(self.instance) return self.bulk except Exception: with excutils.save_and_reraise_exception(): _release_remote_semaphore()
def try_telnet(self, host, port): try: telnetlib.Telnet(host, port) except Exception as e: with excutils.save_and_reraise_exception(): print('\nTelnet has failed: ' + str(e) + ' NODE IP: %s, PORT: %s. Passed %s minute(s).' % (host, port, self.common_config.TELNET_TIMEOUT))
def __init__(self, instance): super(BulkInstanceInteropHelper, self).__init__(instance) self.proc = procutils.start_subprocess() try: procutils.run_in_subprocess(self.proc, _connect, self._get_conn_params()) except Exception: with excutils.save_and_reraise_exception(): procutils.shutdown_subprocess(self.proc, _cleanup)
def _run_wordcount_job(self): try: self.execute_command('./script.sh run_wordcount_job') except Exception as e: with excutils.save_and_reraise_exception(): print('\nFailure while \'Wordcount\' job launch: ' + str(e)) self.capture_error_log_from_cluster_node( '/tmp/MapReduceTestOutput/log.txt' )
def _compare_configs_on_cluster_node(self, config, value): config = config.replace(' ', '') try: self.execute_command('./script.sh %s -value %s' % (config, value)) except Exception as e: with excutils.save_and_reraise_exception(): print('\nFailure while config comparison on cluster node: ' + str(e)) self.capture_error_log_from_cluster_node( '/tmp/config-test-log.txt')
def create_cluster(self, cluster): ctx = context.ctx() try: # create all instances conductor.cluster_update(ctx, cluster, {"status": "Spawning"}) LOG.info(g.format_cluster_status(cluster)) self._create_instances(cluster) # wait for all instances are up and networks ready cluster = conductor.cluster_update(ctx, cluster, {"status": "Waiting"}) LOG.info(g.format_cluster_status(cluster)) instances = g.get_instances(cluster) self._await_active(cluster, instances) if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return self._assign_floating_ips(instances) self._await_networks(cluster, instances) if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return cluster = conductor.cluster_get(ctx, cluster) # attach volumes volumes.attach(cluster) # prepare all instances cluster = conductor.cluster_update(ctx, cluster, {"status": "Preparing"}) LOG.info(g.format_cluster_status(cluster)) self._configure_instances(cluster) except Exception as ex: with excutils.save_and_reraise_exception(): if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return self._log_operation_exception( "Can't start cluster '%s' (reason: %s)", cluster, ex) cluster = conductor.cluster_update( ctx, cluster, {"status": "Error", "status_description": str(ex)}) LOG.info(g.format_cluster_status(cluster)) self._rollback_cluster_creation(cluster, ex)
def try_telnet(self, host, port): try: telnetlib.Telnet(host, port) except Exception as e: with excutils.save_and_reraise_exception(): print( '\nTelnet has failed: ' + str(e) + ' NODE IP: %s, PORT: %s. Passed %s minute(s).' % (host, port, self.common_config.TELNET_TIMEOUT) )
def try_get_image_id_and_ssh_username(parameter, value): try: if not plugin_config.SSH_USERNAME: return image.id, image.metadata[imgs.PROP_USERNAME] else: return image.id, plugin_config.SSH_USERNAME except KeyError: with excutils.save_and_reraise_exception(): print_error_log(parameter, value)
def _run(self, func, *args, **kwargs): proc = procutils.start_subprocess() try: procutils.run_in_subprocess(proc, _connect, self._get_conn_params()) return procutils.run_in_subprocess(proc, func, args, kwargs) except Exception: with excutils.save_and_reraise_exception(): procutils.shutdown_subprocess(proc, _cleanup) finally: procutils.shutdown_subprocess(proc, _cleanup)
def transfer_helper_script_to_node(self, script_name, parameter_list=None): script = open("sahara/tests/integration/tests/resources/%s" % script_name).read() if parameter_list: for parameter, value in parameter_list.items(): script = script.replace('%s=""' % parameter, "%s=%s" % (parameter, value)) try: self.write_file_to("script.sh", script) except Exception as e: with excutils.save_and_reraise_exception(): print("\nFailure while helper script transferring " "to cluster node: " + str(e)) self.execute_command("chmod 777 script.sh")
def remove_path_on_error(path, remove=delete_if_exists): """Protect code that wants to operate on PATH atomically. Any exception will cause PATH to be removed. :param path: File to work with :param remove: Optional function to remove passed path """ try: yield except Exception: with excutils.save_and_reraise_exception(): remove(path)
def scale_cluster(self, cluster, node_group_id_map): ctx = context.ctx() instance_ids = [] try: instance_ids = self._scale_cluster_instances(cluster, node_group_id_map) cluster = conductor.cluster_get(ctx, cluster) g.clean_cluster_from_empty_ng(cluster) cluster = conductor.cluster_get(ctx, cluster) instances = g.get_instances(cluster, instance_ids) self._await_active(cluster, instances) self._assign_floating_ips(instances) self._await_networks(cluster, instances) cluster = conductor.cluster_get(ctx, cluster) volumes.attach_to_instances( g.get_instances(cluster, instance_ids)) except Exception as ex: with excutils.save_and_reraise_exception(): self._log_operation_exception( "Can't scale cluster '%s' (reason: %s)", cluster, ex) cluster = conductor.cluster_get(ctx, cluster) self._rollback_cluster_scaling( cluster, g.get_instances(cluster, instance_ids), ex) instance_ids = [] cluster = conductor.cluster_get(ctx, cluster) g.clean_cluster_from_empty_ng(cluster) if cluster.status == 'Decommissioning': cluster = conductor.cluster_update(ctx, cluster, {"status": "Error"}) else: cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster)) # we should be here with valid cluster: if instances creation # was not successful all extra-instances will be removed above if instance_ids: self._configure_instances(cluster) return instance_ids
def get_floating_ip_pool_id_for_neutron_net(self): # Find corresponding floating IP pool by its name and get its ID. # If pool not found then handle error try: floating_ip_pool = self.neutron.list_networks(name=self.common_config.FLOATING_IP_POOL) floating_ip_pool_id = floating_ip_pool["networks"][0]["id"] return floating_ip_pool_id except IndexError: with excutils.save_and_reraise_exception(): raise Exception( "\nFloating IP pool '%s' not found in pool list. " "Please, make sure you specified right floating IP pool." % self.common_config.FLOATING_IP_POOL )
def _compare_configs_on_cluster_node(self, config, value): config = config.replace(' ', '') try: self.execute_command('./script.sh %s -value %s' % (config, value)) except Exception as e: with excutils.save_and_reraise_exception(): print( '\nFailure while config comparison on cluster node: ' + str(e) ) self.capture_error_log_from_cluster_node( '/tmp/config-test-log.txt' )
def _get_name_of_completed_pi_job(self): try: job_name = self.execute_command('./script.sh get_pi_job_name') except Exception as e: with excutils.save_and_reraise_exception(): print( '\nFailure while name obtaining completed \'PI\' job: ' + str(e) ) self.capture_error_log_from_cluster_node( '/tmp/MapReduceTestOutput/log.txt' ) return job_name[1][:-1]
def scale_cluster(id, data): ctx = context.ctx() cluster = conductor.cluster_get(ctx, id) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) existing_node_groups = data.get('resize_node_groups', []) additional_node_groups = data.get('add_node_groups', []) #the next map is the main object we will work with #to_be_enlarged : {node_group_id: desired_amount_of_instances} to_be_enlarged = {} for ng in existing_node_groups: ng_id = g.find(cluster.node_groups, name=ng['name'])['id'] to_be_enlarged.update({ng_id: ng['count']}) additional = construct_ngs_for_scaling(cluster, additional_node_groups) cluster = conductor.cluster_get(ctx, cluster) # update nodegroup image usernames for nodegroup in cluster.node_groups: if additional.get(nodegroup.id): image_username = INFRA.get_node_group_image_username(nodegroup) conductor.node_group_update(ctx, nodegroup, {"image_username": image_username}) cluster = conductor.cluster_get(ctx, cluster) try: cluster = conductor.cluster_update(ctx, cluster, {"status": "Validating"}) LOG.info(g.format_cluster_status(cluster)) plugin.validate_scaling(cluster, to_be_enlarged, additional) except Exception: with excutils.save_and_reraise_exception(): g.clean_cluster_from_empty_ng(cluster) cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster)) # If we are here validation is successful. # So let's update to_be_enlarged map: to_be_enlarged.update(additional) for node_group in cluster.node_groups: if node_group.id not in to_be_enlarged: to_be_enlarged[node_group.id] = node_group.count context.spawn("cluster-scaling-%s" % id, _provision_scaled_cluster, id, to_be_enlarged) return conductor.cluster_get(ctx, id)
def scale_cluster(id, data): ctx = context.ctx() cluster = conductor.cluster_get(ctx, id) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) existing_node_groups = data.get('resize_node_groups', []) additional_node_groups = data.get('add_node_groups', []) #the next map is the main object we will work with #to_be_enlarged : {node_group_id: desired_amount_of_instances} to_be_enlarged = {} for ng in existing_node_groups: ng_id = g.find(cluster.node_groups, name=ng['name'])['id'] to_be_enlarged.update({ng_id: ng['count']}) additional = construct_ngs_for_scaling(cluster, additional_node_groups) cluster = conductor.cluster_get(ctx, cluster) # update nodegroup image usernames for nodegroup in cluster.node_groups: if additional.get(nodegroup.id): image_username = INFRA.get_node_group_image_username(nodegroup) conductor.node_group_update( ctx, nodegroup, {"image_username": image_username}) cluster = conductor.cluster_get(ctx, cluster) try: cluster = conductor.cluster_update(ctx, cluster, {"status": "Validating"}) LOG.info(g.format_cluster_status(cluster)) plugin.validate_scaling(cluster, to_be_enlarged, additional) except Exception: with excutils.save_and_reraise_exception(): g.clean_cluster_from_empty_ng(cluster) cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster)) # If we are here validation is successful. # So let's update to_be_enlarged map: to_be_enlarged.update(additional) for node_group in cluster.node_groups: if node_group.id not in to_be_enlarged: to_be_enlarged[node_group.id] = node_group.count context.spawn("cluster-scaling-%s" % id, _provision_scaled_cluster, id, to_be_enlarged) return conductor.cluster_get(ctx, id)
def get_internal_neutron_net_id(self): # Find corresponding internal Neutron network by its name and get # its ID. If network not found then handle error try: internal_neutron_net = self.neutron.list_networks( name=self.common_config.INTERNAL_NEUTRON_NETWORK) internal_neutron_net_id = internal_neutron_net['networks'][0]['id'] return internal_neutron_net_id except IndexError: with excutils.save_and_reraise_exception(): raise Exception( '\nInternal Neutron network \'%s\' not found in network ' 'list. Please, make sure you specified right network name.' % self.common_config.INTERNAL_NEUTRON_NETWORK)
def get_floating_ip_pool_id_for_neutron_net(self): # Find corresponding floating IP pool by its name and get its ID. # If pool not found then handle error try: floating_ip_pool = self.neutron.list_networks( name=self.common_config.FLOATING_IP_POOL) floating_ip_pool_id = floating_ip_pool['networks'][0]['id'] return floating_ip_pool_id except IndexError: with excutils.save_and_reraise_exception(): raise Exception( '\nFloating IP pool \'%s\' not found in pool list. ' 'Please, make sure you specified right floating IP pool.' % self.common_config.FLOATING_IP_POOL)
def transfer_helper_script_to_node(self, script_name, parameter_list=None): script = open('sahara/tests/integration/tests/resources/%s' % script_name).read() if parameter_list: for parameter, value in parameter_list.items(): script = script.replace('%s=""' % parameter, '%s=%s' % (parameter, value)) try: self.write_file_to('script.sh', script) except Exception as e: with excutils.save_and_reraise_exception(): print('\nFailure while helper script transferring ' 'to cluster node: ' + str(e)) self.execute_command('chmod 777 script.sh')
def get_internal_neutron_net_id(self): # Find corresponding internal Neutron network by its name and get # its ID. If network not found then handle error try: internal_neutron_net = self.neutron.list_networks(name=self.common_config.INTERNAL_NEUTRON_NETWORK) internal_neutron_net_id = internal_neutron_net["networks"][0]["id"] return internal_neutron_net_id except IndexError: with excutils.save_and_reraise_exception(): raise Exception( "\nInternal Neutron network '%s' not found in network " "list. Please, make sure you specified right network name." % self.common_config.INTERNAL_NEUTRON_NETWORK )
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # validating cluster try: cluster = g.change_cluster_status(cluster, "Validating") plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): g.change_cluster_status(cluster, "Error", status_description=six.text_type(e)) OPS.provision_cluster(cluster.id) return cluster
def cluster_scaling(self, cluster_info, change_list): scale_body = {'add_node_groups': [], 'resize_node_groups': []} for change in change_list: if change['operation'] == 'resize': node_group_name = change['info'][0] node_group_size = change['info'][1] self._add_new_field_to_scale_body_while_ng_resizing( scale_body, node_group_name, node_group_size) self._change_node_info_while_ng_resizing( node_group_name, node_group_size, cluster_info) if change['operation'] == 'add': node_group_name = change['info'][0] node_group_size = change['info'][1] node_group_id = change['info'][2] self._add_new_field_to_scale_body_while_ng_adding( scale_body, node_group_id, node_group_size, node_group_name) self._change_node_info_while_ng_adding(node_group_id, node_group_size, cluster_info) self.sahara.clusters.scale(cluster_info['cluster_id'], scale_body) self.poll_cluster_state(cluster_info['cluster_id']) new_node_ip_list = self.get_cluster_node_ip_list_with_node_processes( cluster_info['cluster_id']) try: new_node_info = self.get_node_info(new_node_ip_list, cluster_info['plugin_config']) except Exception as e: with excutils.save_and_reraise_exception(): print('\nFailure during check of node process deployment ' 'on cluster node: ' + str(e)) expected_node_info = cluster_info['node_info'] self.assertEqual( expected_node_info, new_node_info, 'Failure while node info comparison.\n' 'Expected node info after cluster scaling: %s.\n' 'Actual node info after cluster scaling: %s.' % (expected_node_info, new_node_info)) return { 'cluster_id': cluster_info['cluster_id'], 'node_ip_list': new_node_ip_list, 'node_info': new_node_info, 'plugin_config': cluster_info['plugin_config'] }
def scale_cluster(self, cluster, target_count): ctx = context.ctx() rollback_count = self._get_ng_counts(cluster) launcher = _ScaleLauncher() try: launcher.launch_instances(ctx, cluster, target_count) except Exception as ex: with excutils.save_and_reraise_exception(): if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return self._log_operation_exception( "Can't scale cluster '%s' (reason: %s)", cluster, ex) cluster = conductor.cluster_get(ctx, cluster) try: self._rollback_cluster_scaling( ctx, cluster, rollback_count, target_count) except Exception: if not g.check_cluster_exists(cluster): LOG.info(g.format_cluster_deleted_message(cluster)) return # if something fails during the rollback, we stop # doing anything further cluster = conductor.cluster_update(ctx, cluster, {"status": "Error"}) LOG.info(g.format_cluster_status(cluster)) LOG.error("Unable to complete rollback, aborting") raise cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster)) LOG.warn( "Rollback successful. Throwing off an initial exception.") finally: cluster = conductor.cluster_get(ctx, cluster) g.clean_cluster_from_empty_ng(cluster) return launcher.inst_ids
def create_cluster(self, cluster): ctx = context.ctx() try: # create all instances conductor.cluster_update(ctx, cluster, {"status": "Spawning"}) LOG.info(g.format_cluster_status(cluster)) self._create_instances(cluster) # wait for all instances are up and networks ready cluster = conductor.cluster_update(ctx, cluster, {"status": "Waiting"}) LOG.info(g.format_cluster_status(cluster)) instances = g.get_instances(cluster) self._await_active(cluster, instances) self._assign_floating_ips(instances) self._await_networks(cluster, instances) cluster = conductor.cluster_get(ctx, cluster) # attach volumes volumes.attach(cluster) # prepare all instances cluster = conductor.cluster_update(ctx, cluster, {"status": "Preparing"}) LOG.info(g.format_cluster_status(cluster)) self._configure_instances(cluster) except Exception as ex: with excutils.save_and_reraise_exception(): self._log_operation_exception( "Can't start cluster '%s' (reason: %s)", cluster, ex) cluster = conductor.cluster_update( ctx, cluster, { "status": "Error", "status_description": str(ex) }) LOG.info(g.format_cluster_status(cluster)) self._rollback_cluster_creation(cluster, ex)
def create_cluster(self, cluster): ctx = context.ctx() launcher = _CreateLauncher() try: target_count = self._get_ng_counts(cluster) self._nullify_ng_counts(cluster) cluster = conductor.cluster_get(ctx, cluster) launcher.launch_instances(ctx, cluster, target_count) except Exception as ex: with excutils.save_and_reraise_exception(): self._log_operation_exception( "Can't start cluster '%s' (reason: %s)", cluster, ex) cluster = conductor.cluster_update( ctx, cluster, {"status": "Error", "status_description": str(ex)}) LOG.info(g.format_cluster_status(cluster)) self._rollback_cluster_creation(cluster)
def cluster_scaling(self, cluster_info, change_list): scale_body = {"add_node_groups": [], "resize_node_groups": []} for change in change_list: if change["operation"] == "resize": node_group_name = change["info"][0] node_group_size = change["info"][1] self._add_new_field_to_scale_body_while_ng_resizing(scale_body, node_group_name, node_group_size) self._change_node_info_while_ng_resizing(node_group_name, node_group_size, cluster_info) if change["operation"] == "add": node_group_name = change["info"][0] node_group_size = change["info"][1] node_group_id = change["info"][2] self._add_new_field_to_scale_body_while_ng_adding( scale_body, node_group_id, node_group_size, node_group_name ) self._change_node_info_while_ng_adding(node_group_id, node_group_size, cluster_info) self.sahara.clusters.scale(cluster_info["cluster_id"], scale_body) self.poll_cluster_state(cluster_info["cluster_id"]) new_node_ip_list = self.get_cluster_node_ip_list_with_node_processes(cluster_info["cluster_id"]) try: new_node_info = self.get_node_info(new_node_ip_list, cluster_info["plugin_config"]) except Exception as e: with excutils.save_and_reraise_exception(): print("\nFailure during check of node process deployment " "on cluster node: " + str(e)) expected_node_info = cluster_info["node_info"] self.assertEqual( expected_node_info, new_node_info, "Failure while node info comparison.\n" "Expected node info after cluster scaling: %s.\n" "Actual node info after cluster scaling: %s." % (expected_node_info, new_node_info), ) return { "cluster_id": cluster_info["cluster_id"], "node_ip_list": new_node_ip_list, "node_info": new_node_info, "plugin_config": cluster_info["plugin_config"], }
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # validating cluster try: cluster = conductor.cluster_update(ctx, cluster, {"status": "Validating"}) LOG.info(g.format_cluster_status(cluster)) plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): cluster = conductor.cluster_update(ctx, cluster, {"status": "Error", "status_description": str(e)}) LOG.info(g.format_cluster_status(cluster)) OPS.provision_cluster(cluster.id) return cluster
def scale_cluster(self, cluster, target_count): ctx = context.ctx() rollback_count = self._get_ng_counts(cluster) launcher = _ScaleLauncher() try: launcher.launch_instances(ctx, cluster, target_count) except Exception as ex: with excutils.save_and_reraise_exception(): self._log_operation_exception( "Can't scale cluster '%s' (reason: %s)", cluster, ex) cluster = conductor.cluster_get(ctx, cluster) try: self._rollback_cluster_scaling( ctx, cluster, rollback_count, target_count) except Exception: # if something fails during the rollback, we stop # doing anything further cluster = conductor.cluster_update(ctx, cluster, {"status": "Error"}) LOG.info(g.format_cluster_status(cluster)) LOG.error("Unable to complete rollback, aborting") raise cluster = conductor.cluster_update(ctx, cluster, {"status": "Active"}) LOG.info(g.format_cluster_status(cluster)) LOG.warn( "Rollback successful. Throwing off an initial exception.") finally: cluster = conductor.cluster_get(ctx, cluster) g.clean_cluster_from_empty_ng(cluster) return launcher.inst_ids
def create_cluster(values): ctx = context.ctx() cluster = conductor.cluster_create(ctx, values) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) # validating cluster try: cluster = conductor.cluster_update(ctx, cluster, {"status": "Validating"}) LOG.info(g.format_cluster_status(cluster)) plugin.validate(cluster) except Exception as e: with excutils.save_and_reraise_exception(): cluster = conductor.cluster_update(ctx, cluster, { "status": "Error", "status_description": str(e) }) LOG.info(g.format_cluster_status(cluster)) OPS.provision_cluster(cluster.id) return cluster
def get_cluster_info(self, plugin_config): node_ip_list_with_node_processes = ( self.get_cluster_node_ip_list_with_node_processes(self.cluster_id)) try: node_info = self.get_node_info(node_ip_list_with_node_processes, plugin_config) except Exception as e: with excutils.save_and_reraise_exception(): print('\nFailure during check of node process deployment ' 'on cluster node: ' + str(e)) # For example: method "create_cluster_and_get_info" return # { # 'node_info': { # 'tasktracker_count': 3, # 'node_count': 6, # 'namenode_ip': '172.18.168.242', # 'datanode_count': 3 # }, # 'cluster_id': 'bee5c6a1-411a-4e88-95fc-d1fbdff2bb9d', # 'node_ip_list': { # '172.18.168.153': ['tasktracker', 'datanode'], # '172.18.168.208': ['secondarynamenode', 'oozie'], # '172.18.168.93': ['tasktracker'], # '172.18.168.101': ['tasktracker', 'datanode'], # '172.18.168.242': ['namenode', 'jobtracker'], # '172.18.168.167': ['datanode'] # }, # 'plugin_config': <oslo.config.cfg.GroupAttr object at 0x215d9d> # } return { 'cluster_id': self.cluster_id, 'node_ip_list': node_ip_list_with_node_processes, 'node_info': node_info, 'plugin_config': plugin_config }
def cluster_config_testing(self, cluster_info): cluster_id = cluster_info['cluster_id'] data = self.sahara.clusters.get(cluster_id) self._compare_configs({'Enable Swift': True}, data.cluster_configs['general']) self._compare_configs(CLUSTER_HDFS_CONFIG, data.cluster_configs['HDFS']) self._compare_configs(CLUSTER_MR_CONFIG, data.cluster_configs['MapReduce']) node_groups = data.node_groups self._check_configs_for_node_groups(node_groups) node_ip_list_with_node_processes = ( self.get_cluster_node_ip_list_with_node_processes(cluster_id)) try: self.transfer_helper_script_to_nodes( node_ip_list_with_node_processes, self.vanilla_config.SSH_USERNAME, 'cluster_config_test_script.sh') except Exception as e: with excutils.save_and_reraise_exception(): print(str(e)) self._check_config_application_on_cluster_nodes( node_ip_list_with_node_processes)
def scale_cluster(id, data): ctx = context.ctx() cluster = conductor.cluster_get(ctx, id) plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name) existing_node_groups = data.get('resize_node_groups', []) additional_node_groups = data.get('add_node_groups', []) # the next map is the main object we will work with # to_be_enlarged : {node_group_id: desired_amount_of_instances} to_be_enlarged = {} for ng in existing_node_groups: ng_id = g.find(cluster.node_groups, name=ng['name'])['id'] to_be_enlarged.update({ng_id: ng['count']}) additional = construct_ngs_for_scaling(cluster, additional_node_groups) cluster = conductor.cluster_get(ctx, cluster) try: cluster = g.change_cluster_status(cluster, "Validating") plugin.validate_scaling(cluster, to_be_enlarged, additional) except Exception: with excutils.save_and_reraise_exception(): g.clean_cluster_from_empty_ng(cluster) g.change_cluster_status(cluster, "Active") # If we are here validation is successful. # So let's update to_be_enlarged map: to_be_enlarged.update(additional) for node_group in cluster.node_groups: if node_group.id not in to_be_enlarged: to_be_enlarged[node_group.id] = node_group.count OPS.provision_scaled_cluster(id, to_be_enlarged) return cluster
def test_vanilla_plugin_gating(self): self.vanilla_config.IMAGE_ID, self.vanilla_config.SSH_USERNAME = ( self.get_image_id_and_ssh_username(self.vanilla_config)) # Default value of self.common_config.FLOATING_IP_POOL is None floating_ip_pool = self.common_config.FLOATING_IP_POOL internal_neutron_net = None # If Neutron enabled then get ID of floating IP pool and ID of internal # Neutron network if self.common_config.NEUTRON_ENABLED: floating_ip_pool = self.get_floating_ip_pool_id_for_neutron_net() internal_neutron_net = self.get_internal_neutron_net_id() #----------------------------TRANSIENT CLUSTER TESTING------------------------- try: self.transient_cluster_testing(self.vanilla_config, floating_ip_pool, internal_neutron_net) except Exception as e: with excutils.save_and_reraise_exception(): message = 'Failure while transient cluster testing: ' self.print_error_log(message, e) if self.vanilla_config.ONLY_TRANSIENT_CLUSTER_TEST: return #-------------------------------CLUSTER CREATION------------------------------- #---------------------"tt-dn" node group template creation--------------------- node_group_template_id_list = [] try: node_group_template_tt_dn_id = self.create_node_group_template( name='test-node-group-template-vanilla-tt-dn', plugin_config=self.vanilla_config, description='test node group template for Vanilla plugin', node_processes=['tasktracker', 'datanode'], node_configs={ 'HDFS': cluster_configs.DN_CONFIG, 'MapReduce': cluster_configs.TT_CONFIG }, floating_ip_pool=floating_ip_pool) node_group_template_id_list.append(node_group_template_tt_dn_id) except Exception as e: with excutils.save_and_reraise_exception(): message = 'Failure while \'tt-dn\' node group ' \ 'template creation: ' self.print_error_log(message, e) #-----------------------"tt" node group template creation---------------------- if not self.vanilla_config.SKIP_CINDER_TEST: volumes_per_node = 2 volume_size = 2 else: volumes_per_node = 0 volume_size = 0 try: node_group_template_tt_id = self.create_node_group_template( name='test-node-group-template-vanilla-tt', plugin_config=self.vanilla_config, description='test node group template for Vanilla plugin', volumes_per_node=volumes_per_node, volume_size=volume_size, node_processes=['tasktracker'], node_configs={'MapReduce': cluster_configs.TT_CONFIG}, floating_ip_pool=floating_ip_pool) node_group_template_id_list.append(node_group_template_tt_id) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( node_group_template_id_list=node_group_template_id_list) message = 'Failure while \'tt\' node group template creation: ' self.print_error_log(message, e) #----------------------"dn" node group template creation----------------------- try: node_group_template_dn_id = self.create_node_group_template( name='test-node-group-template-vanilla-dn', plugin_config=self.vanilla_config, description='test node group template for Vanilla plugin', volumes_per_node=volumes_per_node, volume_size=volume_size, node_processes=['datanode'], node_configs={'HDFS': cluster_configs.DN_CONFIG}, floating_ip_pool=floating_ip_pool) node_group_template_id_list.append(node_group_template_dn_id) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( node_group_template_id_list=node_group_template_id_list) message = 'Failure while \'dn\' node group template creation: ' self.print_error_log(message, e) #---------------------------Cluster template creation-------------------------- try: cluster_template_id = self.create_cluster_template( name='test-cluster-template-vanilla', plugin_config=self.vanilla_config, description='test cluster template for Vanilla plugin', cluster_configs={ 'HDFS': cluster_configs.CLUSTER_HDFS_CONFIG, 'MapReduce': cluster_configs.CLUSTER_MR_CONFIG, 'general': { 'Enable Swift': True } }, node_groups=[ dict(name='master-node-jt-nn', flavor_id=self.flavor_id, node_processes=['namenode', 'jobtracker'], node_configs={ 'HDFS': cluster_configs.NN_CONFIG, 'MapReduce': cluster_configs.JT_CONFIG }, floating_ip_pool=floating_ip_pool, count=1), dict( name='master-node-sec-nn-oz', flavor_id=self.flavor_id, node_processes=['secondarynamenode', 'oozie'], node_configs={'JobFlow': cluster_configs.OOZIE_CONFIG}, floating_ip_pool=floating_ip_pool, count=1), dict(name='worker-node-tt-dn', node_group_template_id=node_group_template_tt_dn_id, count=3), dict(name='worker-node-dn', node_group_template_id=node_group_template_dn_id, count=1), dict(name='worker-node-tt', node_group_template_id=node_group_template_tt_id, count=1) ], net_id=internal_neutron_net) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( node_group_template_id_list=node_group_template_id_list) message = 'Failure while cluster template creation: ' self.print_error_log(message, e) #-------------------------------Cluster creation------------------------------- try: cluster_name = "%s-%s-v1" % (self.common_config.CLUSTER_NAME, self.vanilla_config.PLUGIN_NAME) self.create_cluster(name=cluster_name, plugin_config=self.vanilla_config, cluster_template_id=cluster_template_id, description='test cluster', cluster_configs={}) cluster_info = self.get_cluster_info(self.vanilla_config) self.await_active_workers_for_namenode(cluster_info['node_info'], self.vanilla_config) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(self.cluster_id, cluster_template_id, node_group_template_id_list) message = 'Failure while cluster creation: ' self.print_error_log(message, e) #---------------------------------CINDER TESTING------------------------------- try: self.cinder_volume_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while Cinder testing: ' self.print_error_log(message, e) #----------------------------CLUSTER CONFIG TESTING---------------------------- try: self.cluster_config_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while cluster config testing: ' self.print_error_log(message, e) #----------------------------------EDP TESTING--------------------------------- path = 'sahara/tests/integration/tests/resources/' pig_job_data = open(path + 'edp-job.pig').read() pig_lib_data = open(path + 'edp-lib.jar').read() mapreduce_jar_data = open(path + 'edp-mapreduce.jar').read() # This is a modified version of WordCount that takes swift configs java_lib_data = open(path + 'edp-java/edp-java.jar').read() java_configs = { "configs": { "edp.java.main_class": "org.openstack.sahara.examples.WordCount" } } mapreduce_configs = { "configs": { "mapred.mapper.class": "org.apache.oozie.example.SampleMapper", "mapred.reducer.class": "org.apache.oozie.example.SampleReducer" } } mapreduce_streaming_configs = { "configs": { "edp.streaming.mapper": "/bin/cat", "edp.streaming.reducer": "/usr/bin/wc" } } try: self.edp_testing(job_type=utils_edp.JOB_TYPE_PIG, job_data_list=[{ 'pig': pig_job_data }], lib_data_list=[{ 'jar': pig_lib_data }], swift_binaries=True, hdfs_local_output=True) self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE, job_data_list=[], lib_data_list=[{ 'jar': mapreduce_jar_data }], configs=mapreduce_configs, swift_binaries=True, hdfs_local_output=True) self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE_STREAMING, job_data_list=[], lib_data_list=[], configs=mapreduce_streaming_configs) self.edp_testing(job_type=utils_edp.JOB_TYPE_JAVA, job_data_list=[], lib_data_list=[{ 'jar': java_lib_data }], configs=java_configs, pass_input_output_args=True) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while EDP testing: ' self.print_error_log(message, e) #------------------------------MAP REDUCE TESTING------------------------------ try: self.map_reduce_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while Map Reduce testing: ' self.print_error_log(message, e) #---------------------------CHECK SWIFT AVAILABILITY--------------------------- try: self.check_swift_availability(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure during check of Swift availability: ' self.print_error_log(message, e) #--------------------------------CLUSTER SCALING------------------------------- if not self.vanilla_config.SKIP_SCALING_TEST: change_list = [{ 'operation': 'resize', 'info': ['worker-node-tt-dn', 4] }, { 'operation': 'resize', 'info': ['worker-node-dn', 0] }, { 'operation': 'resize', 'info': ['worker-node-tt', 0] }, { 'operation': 'add', 'info': ['new-worker-node-tt', 1, node_group_template_tt_id] }, { 'operation': 'add', 'info': ['new-worker-node-dn', 1, node_group_template_dn_id] }] try: new_cluster_info = self.cluster_scaling( cluster_info, change_list) self.await_active_workers_for_namenode( new_cluster_info['node_info'], self.vanilla_config) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while cluster scaling: ' self.print_error_log(message, e) #--------------------------CINDER TESTING AFTER SCALING------------------------ try: self.cinder_volume_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while Cinder testing after cluster ' \ 'scaling: ' self.print_error_log(message, e) #---------------------CLUSTER CONFIG TESTING AFTER SCALING--------------------- try: self.cluster_config_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while cluster config testing after ' \ 'cluster scaling: ' self.print_error_log(message, e) #-----------------------MAP REDUCE TESTING AFTER SCALING----------------------- try: self.map_reduce_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while Map Reduce testing after ' \ 'cluster scaling: ' self.print_error_log(message, e) #--------------------CHECK SWIFT AVAILABILITY AFTER SCALING-------------------- try: self.check_swift_availability(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure during check of Swift availability ' \ 'after cluster scaling: ' self.print_error_log(message, e) #----------------------------DELETE CREATED OBJECTS---------------------------- self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list)
def edp_testing(self, job_type, job_data_list, lib_data_list=None, configs=None, pass_input_output_args=False, swift_binaries=False, hdfs_local_output=False): try: swift = self.connect_to_swift() container_name = 'Edp-test-%s' % str(uuid.uuid4())[:8] swift.put_container(container_name) swift.put_object( container_name, 'input', ''.join( random.choice(':' + ' ' + '\n' + string.ascii_lowercase) for x in range(10000))) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_swift_container(swift, container_name) print(str(e)) input_id = None output_id = None job_id = None job_execution = None try: job_binary_list = [] lib_binary_list = [] job_binary_internal_list = [] swift_input_url = 'swift://%s.sahara/input' % container_name if hdfs_local_output: # This will create a file in hdfs under the user # executing the job (i.e. /usr/hadoop/Edp-test-xxxx-out) output_type = "hdfs" output_url = container_name + "-out" else: output_type = "swift" output_url = 'swift://%s.sahara/output' % container_name # Java jobs don't use data sources. Input/output paths must # be passed as args with corresponding username/password configs if not edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA): input_id = self._create_data_source( 'input-%s' % str(uuid.uuid4())[:8], 'swift', swift_input_url) output_id = self._create_data_source( 'output-%s' % str(uuid.uuid4())[:8], output_type, output_url) if job_data_list: if swift_binaries: self._create_job_binaries(job_data_list, job_binary_internal_list, job_binary_list, swift_connection=swift, container_name=container_name) else: self._create_job_binaries(job_data_list, job_binary_internal_list, job_binary_list) if lib_data_list: if swift_binaries: self._create_job_binaries(lib_data_list, job_binary_internal_list, lib_binary_list, swift_connection=swift, container_name=container_name) else: self._create_job_binaries(lib_data_list, job_binary_internal_list, lib_binary_list) job_id = self._create_job( 'Edp-test-job-%s' % str(uuid.uuid4())[:8], job_type, job_binary_list, lib_binary_list) if not configs: configs = {} # Append the input/output paths with the swift configs # if the caller has requested it... if edp.compare_job_type( job_type, edp.JOB_TYPE_JAVA) and pass_input_output_args: self._add_swift_configs(configs) if "args" in configs: configs["args"].extend([swift_input_url, output_url]) else: configs["args"] = [swift_input_url, output_url] job_execution = self.sahara.job_executions.create(job_id, self.cluster_id, input_id, output_id, configs=configs) if job_execution: self._await_job_execution(job_execution) except Exception as e: with excutils.save_and_reraise_exception(): print(str(e)) finally: self.delete_swift_container(swift, container_name) self._delete_job(job_execution, job_id, job_binary_list + lib_binary_list, job_binary_internal_list, input_id, output_id)
def test_hdp_plugin_gating(self): self.hdp_config.IMAGE_ID, self.hdp_config.SSH_USERNAME = ( self.get_image_id_and_ssh_username(self.hdp_config)) # Default value of self.common_config.FLOATING_IP_POOL is None floating_ip_pool = self.common_config.FLOATING_IP_POOL internal_neutron_net = None # If Neutron enabled then get ID of floating IP pool and ID of internal # Neutron network if self.common_config.NEUTRON_ENABLED: floating_ip_pool = self.get_floating_ip_pool_id_for_neutron_net() internal_neutron_net = self.get_internal_neutron_net_id() if not self.hdp_config.SKIP_CINDER_TEST: volumes_per_node = 2 volume_size = 2 else: volumes_per_node = 0 volume_size = 0 node_group_template_id_list = [] #-------------------------------CLUSTER CREATION------------------------------- #-----------------------"tt-dn" node group template creation------------------- try: node_group_template_tt_dn_id = self.create_node_group_template( name='test-node-group-template-hdp-tt-dn', plugin_config=self.hdp_config, description='test node group template for HDP plugin', volumes_per_node=volumes_per_node, volume_size=volume_size, node_processes=[ 'TASKTRACKER', 'DATANODE', 'HDFS_CLIENT', 'MAPREDUCE_CLIENT', 'OOZIE_CLIENT', 'PIG' ], node_configs={}, floating_ip_pool=floating_ip_pool) node_group_template_id_list.append(node_group_template_tt_dn_id) except Exception as e: with excutils.save_and_reraise_exception(): message = 'Failure while \'tt-dn\' node group ' \ 'template creation: ' self.print_error_log(message, e) #---------------------------Cluster template creation-------------------------- try: cluster_template_id = self.create_cluster_template( name='test-cluster-template-hdp', plugin_config=self.hdp_config, description='test cluster template for HDP plugin', cluster_configs={}, node_groups=[ dict(name='master-node-jt-nn', flavor_id=self.flavor_id, node_processes=[ 'JOBTRACKER', 'NAMENODE', 'SECONDARY_NAMENODE', 'GANGLIA_SERVER', 'NAGIOS_SERVER', 'AMBARI_SERVER', 'OOZIE_SERVER' ], node_configs={}, floating_ip_pool=floating_ip_pool, count=1), dict(name='worker-node-tt-dn', node_group_template_id=node_group_template_tt_dn_id, count=3) ], net_id=internal_neutron_net) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( node_group_template_id_list=node_group_template_id_list) message = 'Failure while cluster template creation: ' self.print_error_log(message, e) #-------------------------------Cluster creation------------------------------- cluster_name = (self.common_config.CLUSTER_NAME + '-' + self.hdp_config.PLUGIN_NAME) try: self.create_cluster(name=cluster_name, plugin_config=self.hdp_config, cluster_template_id=cluster_template_id, description='test cluster', cluster_configs={}) cluster_info = self.get_cluster_info(self.hdp_config) self.await_active_workers_for_namenode(cluster_info['node_info'], self.hdp_config) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(self.cluster_id, cluster_template_id, node_group_template_id_list) message = 'Failure while cluster creation: ' self.print_error_log(message, e) #---------------------------------CINDER TESTING------------------------------- try: self.cinder_volume_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while Cinder testing: ' self.print_error_log(message, e) #----------------------------------EDP TESTING--------------------------------- path = 'sahara/tests/integration/tests/resources/' pig_job_data = open(path + 'edp-job.pig').read() pig_lib_data = open(path + 'edp-lib.jar').read() mapreduce_jar_data = open(path + 'edp-mapreduce.jar').read() # This is a modified version of WordCount that takes swift configs java_lib_data = open(path + 'edp-java.jar').read() java_configs = { "configs": { "edp.java.main_class": "org.apache.hadoop.examples.WordCount" } } mapreduce_configs = { "configs": { "mapred.mapper.class": "org.apache.oozie.example.SampleMapper", "mapred.reducer.class": "org.apache.oozie.example.SampleReducer" } } mapreduce_streaming_configs = { "configs": { "edp.streaming.mapper": "/bin/cat", "edp.streaming.reducer": "/usr/bin/wc" } } try: self.edp_testing('Pig', [{ 'pig': pig_job_data }], [{ 'jar': pig_lib_data }]) self.edp_testing('MapReduce', [], [{ 'jar': mapreduce_jar_data }], mapreduce_configs) self.edp_testing('MapReduce.Streaming', [], [], mapreduce_streaming_configs) self.edp_testing('Java', [], lib_data_list=[{ 'jar': java_lib_data }], configs=java_configs, pass_input_output_args=True) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while EDP testing: ' self.print_error_log(message, e) #------------------------------MAP REDUCE TESTING------------------------------ try: self.map_reduce_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while Map Reduce testing: ' self.print_error_log(message, e) #---------------------------CHECK SWIFT AVAILABILITY--------------------------- try: self.check_swift_availability(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure during check of Swift availability: ' self.print_error_log(message, e) #--------------------------------CLUSTER SCALING------------------------------- datanode_count_after_resizing = ( cluster_info['node_info']['datanode_count'] + self.hdp_config.SCALE_EXISTING_NG_COUNT) change_list = [{ 'operation': 'resize', 'info': ['worker-node-tt-dn', datanode_count_after_resizing] }, { 'operation': 'add', 'info': [ 'new-worker-node-tt-dn', self.hdp_config.SCALE_NEW_NG_COUNT, '%s' % node_group_template_tt_dn_id ] }] try: new_cluster_info = self.cluster_scaling(cluster_info, change_list) self.await_active_workers_for_namenode( new_cluster_info['node_info'], self.hdp_config) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while cluster scaling: ' self.print_error_log(message, e) if not self.hdp_config.SKIP_SCALING_TEST: #--------------------------CINDER TESTING AFTER SCALING------------------------ try: self.cinder_volume_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while Cinder testing after cluster ' \ 'scaling: ' self.print_error_log(message, e) #-----------------------MAP REDUCE TESTING AFTER SCALING----------------------- try: self.map_reduce_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure while Map Reduce testing after ' \ 'cluster scaling: ' self.print_error_log(message, e) #--------------------CHECK SWIFT AVAILABILITY AFTER SCALING-------------------- try: self.check_swift_availability(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects(new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list) message = 'Failure during check of Swift availability ' \ 'after cluster scaling: ' self.print_error_log(message, e) #----------------------------DELETE CREATED OBJECTS---------------------------- self.delete_objects(cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list)
def edp_testing(self, job_type, job_data_list, lib_data_list=None, configs=None, pass_input_output_args=False, swift_binaries=False, hdfs_local_output=False): try: swift = self.connect_to_swift() container_name = 'Edp-test-%s' % str(uuid.uuid4())[:8] swift.put_container(container_name) swift.put_object( container_name, 'input', ''.join( random.choice(':' + ' ' + '\n' + string.ascii_lowercase) for x in range(10000) ) ) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_swift_container(swift, container_name) print(str(e)) input_id = None output_id = None job_id = None job_execution = None try: job_binary_list = [] lib_binary_list = [] job_binary_internal_list = [] swift_input_url = 'swift://%s.sahara/input' % container_name if hdfs_local_output: # This will create a file in hdfs under the user # executing the job (i.e. /usr/hadoop/Edp-test-xxxx-out) output_type = "hdfs" output_url = container_name + "-out" else: output_type = "swift" output_url = 'swift://%s.sahara/output' % container_name # Java jobs don't use data sources. Input/output paths must # be passed as args with corresponding username/password configs if not edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA): input_id = self._create_data_source( 'input-%s' % str(uuid.uuid4())[:8], 'swift', swift_input_url) output_id = self._create_data_source( 'output-%s' % str(uuid.uuid4())[:8], output_type, output_url) if job_data_list: if swift_binaries: self._create_job_binaries(job_data_list, job_binary_internal_list, job_binary_list, swift_connection=swift, container_name=container_name) else: self._create_job_binaries(job_data_list, job_binary_internal_list, job_binary_list) if lib_data_list: if swift_binaries: self._create_job_binaries(lib_data_list, job_binary_internal_list, lib_binary_list, swift_connection=swift, container_name=container_name) else: self._create_job_binaries(lib_data_list, job_binary_internal_list, lib_binary_list) job_id = self._create_job( 'Edp-test-job-%s' % str(uuid.uuid4())[:8], job_type, job_binary_list, lib_binary_list) if not configs: configs = {} # Append the input/output paths with the swift configs # if the caller has requested it... if edp.compare_job_type( job_type, edp.JOB_TYPE_JAVA) and pass_input_output_args: self._add_swift_configs(configs) if "args" in configs: configs["args"].extend([swift_input_url, output_url]) else: configs["args"] = [swift_input_url, output_url] job_execution = self.sahara.job_executions.create( job_id, self.cluster_id, input_id, output_id, configs=configs) if job_execution: self._await_job_execution(job_execution) except Exception as e: with excutils.save_and_reraise_exception(): print(str(e)) finally: self.delete_swift_container(swift, container_name) self._delete_job( job_execution, job_id, job_binary_list+lib_binary_list, job_binary_internal_list, input_id, output_id )
def get_image_id_and_ssh_username(self, plugin_config): def print_error_log(parameter, value): print( '\nImage with %s "%s" was found in image list but it was ' 'possibly not registered for Sahara. Please, make sure image ' 'was correctly registered.' % (parameter, value)) def try_get_image_id_and_ssh_username(parameter, value): try: if not plugin_config.SSH_USERNAME: return image.id, image.metadata[imgs.PROP_USERNAME] else: return image.id, plugin_config.SSH_USERNAME except KeyError: with excutils.save_and_reraise_exception(): print_error_log(parameter, value) images = self.nova.images.list() # If plugin_config.IMAGE_ID is not None then find corresponding image # and return its ID and username. If image not found then handle error if plugin_config.IMAGE_ID: for image in images: if image.id == plugin_config.IMAGE_ID: return try_get_image_id_and_ssh_username( 'ID', plugin_config.IMAGE_ID) self.fail( '\n\nImage with ID "%s" not found in image list. Please, make ' 'sure you specified right image ID.\n' % plugin_config.IMAGE_ID) # If plugin_config.IMAGE_NAME is not None then find corresponding image # and return its ID and username. If image not found then handle error if plugin_config.IMAGE_NAME: for image in images: if image.name == plugin_config.IMAGE_NAME: return try_get_image_id_and_ssh_username( 'name', plugin_config.IMAGE_NAME) self.fail( '\n\nImage with name "%s" not found in image list. Please, ' 'make sure you specified right image name.\n' % plugin_config.IMAGE_NAME) # If plugin_config.IMAGE_TAG is not None then find corresponding image # and return its ID and username. If image not found then handle error if plugin_config.IMAGE_TAG: for image in images: if ( image.metadata.get(imgs.PROP_TAG + '%s' % plugin_config.IMAGE_TAG) ) and (image.metadata.get(imgs.PROP_TAG + ('%s' % plugin_config.PLUGIN_NAME))): return try_get_image_id_and_ssh_username( 'tag', plugin_config.IMAGE_TAG) self.fail( '\n\nImage with tag "%s" not found in list of registered ' 'images for Sahara. Please, make sure tag "%s" was added to ' 'image and image was correctly registered.\n' % (plugin_config.IMAGE_TAG, plugin_config.IMAGE_TAG)) # If plugin_config.IMAGE_ID, plugin_config.IMAGE_NAME and # plugin_config.IMAGE_TAG are None then image is chosen # by tag "sahara_i_tests". If image has tag "sahara_i_tests" # (at the same time image ID, image name and image tag were not # specified in configuration file of integration tests) then return # its ID and username. Found image will be chosen as image for tests. # If image with tag "sahara_i_tests" not found then handle error for image in images: if (image.metadata.get(imgs.PROP_TAG + 'sahara_i_tests')) and ( image.metadata.get(imgs.PROP_TAG + ('%s' % plugin_config.PLUGIN_NAME))): try: if not plugin_config.SSH_USERNAME: return image.id, image.metadata[imgs.PROP_USERNAME] else: return image.id, plugin_config.SSH_USERNAME except KeyError: with excutils.save_and_reraise_exception(): print('\nNone of parameters of image (ID, name, tag)' ' was specified in configuration file of ' 'integration tests. That is why there was ' 'attempt to choose image by tag ' '"sahara_i_tests" and image with such tag ' 'was found in image list but it was possibly ' 'not registered for Sahara. Please, make ' 'sure image was correctly registered.') self.fail( '\n\nNone of parameters of image (ID, name, tag) was specified in ' 'configuration file of integration tests. That is why there was ' 'attempt to choose image by tag "sahara_i_tests" but image with ' 'such tag not found in list of registered images for Sahara. ' 'Please, make sure image was correctly registered. Please, ' 'specify one of parameters of image (ID, name or tag) in ' 'configuration file of integration tests.\n')
def wrapper(*args, **kwargs): try: fct(*args, **kwargs) except Exception as e: with excutils.save_and_reraise_exception(): ITestCase.print_error_log(message, e)
def test_hdp_plugin_gating(self): self.hdp_config.IMAGE_ID, self.hdp_config.SSH_USERNAME = ( self.get_image_id_and_ssh_username(self.hdp_config)) # Default value of self.common_config.FLOATING_IP_POOL is None floating_ip_pool = self.common_config.FLOATING_IP_POOL internal_neutron_net = None # If Neutron enabled then get ID of floating IP pool and ID of internal # Neutron network if self.common_config.NEUTRON_ENABLED: floating_ip_pool = self.get_floating_ip_pool_id_for_neutron_net() internal_neutron_net = self.get_internal_neutron_net_id() if not self.hdp_config.SKIP_CINDER_TEST: volumes_per_node = 2 volume_size = 2 else: volumes_per_node = 0 volume_size = 0 node_group_template_id_list = [] # ------------------------------CLUSTER CREATION------------------------------- # ----------------------"tt-dn" node group template creation------------------- try: node_group_template_tt_dn_id = self.create_node_group_template( name='test-node-group-template-hdp-tt-dn', plugin_config=self.hdp_config, description='test node group template for HDP plugin', volumes_per_node=volumes_per_node, volume_size=volume_size, node_processes=self.hdp_config.WORKER_NODE_PROCESSES, node_configs={}, floating_ip_pool=floating_ip_pool ) node_group_template_id_list.append(node_group_template_tt_dn_id) except Exception as e: with excutils.save_and_reraise_exception(): message = 'Failure while \'tt-dn\' node group ' \ 'template creation: ' self.print_error_log(message, e) # --------------------------Cluster template creation-------------------------- try: cluster_template_id = self.create_cluster_template( name='test-cluster-template-hdp', plugin_config=self.hdp_config, description='test cluster template for HDP plugin', cluster_configs={}, node_groups=[ dict( name='master-node-jt-nn', flavor_id=self.flavor_id, node_processes=self.hdp_config.MASTER_NODE_PROCESSES, node_configs={}, floating_ip_pool=floating_ip_pool, count=1), dict( name='worker-node-tt-dn', node_group_template_id=node_group_template_tt_dn_id, count=3) ], net_id=internal_neutron_net ) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( node_group_template_id_list=node_group_template_id_list ) message = 'Failure while cluster template creation: ' self.print_error_log(message, e) # ------------------------------Cluster creation------------------------------- cluster_name = (self.common_config.CLUSTER_NAME + '-' + self.hdp_config.PLUGIN_NAME) try: self.create_cluster( name=cluster_name, plugin_config=self.hdp_config, cluster_template_id=cluster_template_id, description='test cluster', cluster_configs={} ) cluster_info = self.get_cluster_info(self.hdp_config) self.await_active_workers_for_namenode(cluster_info['node_info'], self.hdp_config) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( self.cluster_id, cluster_template_id, node_group_template_id_list ) message = 'Failure while cluster creation: ' self.print_error_log(message, e) # --------------------------------CINDER TESTING------------------------------- try: self.cinder_volume_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while Cinder testing: ' self.print_error_log(message, e) # ---------------------------------EDP TESTING--------------------------------- path = 'sahara/tests/integration/tests/resources/' pig_job_data = open(path + 'edp-job.pig').read() pig_lib_data = open(path + 'edp-lib.jar').read() mapreduce_jar_data = open(path + 'edp-mapreduce.jar').read() # This is a modified version of WordCount that takes swift configs java_lib_data = open(path + 'edp-java/edp-java.jar').read() java_configs = { "configs": { "edp.java.main_class": "org.openstack.sahara.examples.WordCount" } } mapreduce_configs = { "configs": { "mapred.mapper.class": "org.apache.oozie.example.SampleMapper", "mapred.reducer.class": "org.apache.oozie.example.SampleReducer" } } mapreduce_streaming_configs = { "configs": { "edp.streaming.mapper": "/bin/cat", "edp.streaming.reducer": "/usr/bin/wc" } } try: self.edp_testing(job_type=utils_edp.JOB_TYPE_PIG, job_data_list=[{'pig': pig_job_data}], lib_data_list=[{'jar': pig_lib_data}], swift_binaries=True, hdfs_local_output=True) self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE, job_data_list=[], lib_data_list=[{'jar': mapreduce_jar_data}], configs=mapreduce_configs, swift_binaries=True, hdfs_local_output=True) self.edp_testing(job_type=utils_edp.JOB_TYPE_MAPREDUCE_STREAMING, job_data_list=[], lib_data_list=[], configs=mapreduce_streaming_configs) self.edp_testing(job_type=utils_edp.JOB_TYPE_JAVA, job_data_list=[], lib_data_list=[{'jar': java_lib_data}], configs=java_configs, pass_input_output_args=True) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while EDP testing: ' self.print_error_log(message, e) # -----------------------------MAP REDUCE TESTING------------------------------ try: self.map_reduce_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while Map Reduce testing: ' self.print_error_log(message, e) # --------------------------CHECK SWIFT AVAILABILITY--------------------------- try: self.check_swift_availability(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure during check of Swift availability: ' self.print_error_log(message, e) # -------------------------------CLUSTER SCALING------------------------------- if not self.hdp_config.SKIP_SCALING_TEST: datanode_count_after_resizing = ( cluster_info['node_info']['datanode_count'] + self.hdp_config.SCALE_EXISTING_NG_COUNT) change_list = [ { 'operation': 'resize', 'info': ['worker-node-tt-dn', datanode_count_after_resizing] }, { 'operation': 'add', 'info': [ 'new-worker-node-tt-dn', self.hdp_config.SCALE_NEW_NG_COUNT, '%s' % node_group_template_tt_dn_id ] } ] try: new_cluster_info = self.cluster_scaling(cluster_info, change_list) self.await_active_workers_for_namenode( new_cluster_info['node_info'], self.hdp_config) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while cluster scaling: ' self.print_error_log(message, e) # -------------------------CINDER TESTING AFTER SCALING------------------------ try: self.cinder_volume_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while Cinder testing after cluster ' \ 'scaling: ' self.print_error_log(message, e) # ----------------------MAP REDUCE TESTING AFTER SCALING----------------------- try: self.map_reduce_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while Map Reduce testing after ' \ 'cluster scaling: ' self.print_error_log(message, e) # -------------------CHECK SWIFT AVAILABILITY AFTER SCALING-------------------- try: self.check_swift_availability(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure during check of Swift availability ' \ 'after cluster scaling: ' self.print_error_log(message, e) # ---------------------------DELETE CREATED OBJECTS---------------------------- self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list )
def test_vanilla_plugin_gating(self): self.vanilla_config.IMAGE_ID, self.vanilla_config.SSH_USERNAME = ( self.get_image_id_and_ssh_username(self.vanilla_config)) # Default value of self.common_config.FLOATING_IP_POOL is None floating_ip_pool = self.common_config.FLOATING_IP_POOL internal_neutron_net = None # If Neutron enabled then get ID of floating IP pool and ID of internal # Neutron network if self.common_config.NEUTRON_ENABLED: floating_ip_pool = self.get_floating_ip_pool_id_for_neutron_net() internal_neutron_net = self.get_internal_neutron_net_id() #----------------------------TRANSIENT CLUSTER TESTING------------------------- try: self.transient_cluster_testing( self.vanilla_config, floating_ip_pool, internal_neutron_net) except Exception as e: with excutils.save_and_reraise_exception(): message = 'Failure while transient cluster testing: ' self.print_error_log(message, e) if self.vanilla_config.ONLY_TRANSIENT_CLUSTER_TEST: return #-------------------------------CLUSTER CREATION------------------------------- #---------------------"tt-dn" node group template creation--------------------- node_group_template_id_list = [] try: node_group_template_tt_dn_id = self.create_node_group_template( name='test-node-group-template-vanilla-tt-dn', plugin_config=self.vanilla_config, description='test node group template for Vanilla plugin', node_processes=['tasktracker', 'datanode'], node_configs={ 'HDFS': cluster_configs.DN_CONFIG, 'MapReduce': cluster_configs.TT_CONFIG }, floating_ip_pool=floating_ip_pool ) node_group_template_id_list.append(node_group_template_tt_dn_id) except Exception as e: with excutils.save_and_reraise_exception(): message = 'Failure while \'tt-dn\' node group ' \ 'template creation: ' self.print_error_log(message, e) #-----------------------"tt" node group template creation---------------------- if not self.vanilla_config.SKIP_CINDER_TEST: volumes_per_node = 2 volume_size = 2 else: volumes_per_node = 0 volume_size = 0 try: node_group_template_tt_id = self.create_node_group_template( name='test-node-group-template-vanilla-tt', plugin_config=self.vanilla_config, description='test node group template for Vanilla plugin', volumes_per_node=volumes_per_node, volume_size=volume_size, node_processes=['tasktracker'], node_configs={ 'MapReduce': cluster_configs.TT_CONFIG }, floating_ip_pool=floating_ip_pool ) node_group_template_id_list.append(node_group_template_tt_id) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( node_group_template_id_list=node_group_template_id_list ) message = 'Failure while \'tt\' node group template creation: ' self.print_error_log(message, e) #----------------------"dn" node group template creation----------------------- try: node_group_template_dn_id = self.create_node_group_template( name='test-node-group-template-vanilla-dn', plugin_config=self.vanilla_config, description='test node group template for Vanilla plugin', volumes_per_node=volumes_per_node, volume_size=volume_size, node_processes=['datanode'], node_configs={ 'HDFS': cluster_configs.DN_CONFIG }, floating_ip_pool=floating_ip_pool ) node_group_template_id_list.append(node_group_template_dn_id) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( node_group_template_id_list=node_group_template_id_list ) message = 'Failure while \'dn\' node group template creation: ' self.print_error_log(message, e) #---------------------------Cluster template creation-------------------------- try: cluster_template_id = self.create_cluster_template( name='test-cluster-template-vanilla', plugin_config=self.vanilla_config, description='test cluster template for Vanilla plugin', cluster_configs={ 'HDFS': cluster_configs.CLUSTER_HDFS_CONFIG, 'MapReduce': cluster_configs.CLUSTER_MR_CONFIG, 'general': {'Enable Swift': True} }, node_groups=[ dict( name='master-node-jt-nn', flavor_id=self.flavor_id, node_processes=['namenode', 'jobtracker'], node_configs={ 'HDFS': cluster_configs.NN_CONFIG, 'MapReduce': cluster_configs.JT_CONFIG }, floating_ip_pool=floating_ip_pool, count=1), dict( name='master-node-sec-nn-oz', flavor_id=self.flavor_id, node_processes=['secondarynamenode', 'oozie'], node_configs={ 'JobFlow': cluster_configs.OOZIE_CONFIG }, floating_ip_pool=floating_ip_pool, count=1), dict( name='worker-node-tt-dn', node_group_template_id=node_group_template_tt_dn_id, count=3), dict( name='worker-node-dn', node_group_template_id=node_group_template_dn_id, count=1), dict( name='worker-node-tt', node_group_template_id=node_group_template_tt_id, count=1) ], net_id=internal_neutron_net ) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( node_group_template_id_list=node_group_template_id_list ) message = 'Failure while cluster template creation: ' self.print_error_log(message, e) #-------------------------------Cluster creation------------------------------- try: cluster_name = "%s-%s-v1" % (self.common_config.CLUSTER_NAME, self.vanilla_config.PLUGIN_NAME) self.create_cluster( name=cluster_name, plugin_config=self.vanilla_config, cluster_template_id=cluster_template_id, description='test cluster', cluster_configs={} ) cluster_info = self.get_cluster_info(self.vanilla_config) self.await_active_workers_for_namenode(cluster_info['node_info'], self.vanilla_config) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( self.cluster_id, cluster_template_id, node_group_template_id_list ) message = 'Failure while cluster creation: ' self.print_error_log(message, e) #---------------------------------CINDER TESTING------------------------------- try: self.cinder_volume_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while Cinder testing: ' self.print_error_log(message, e) #----------------------------CLUSTER CONFIG TESTING---------------------------- try: self.cluster_config_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while cluster config testing: ' self.print_error_log(message, e) #----------------------------------EDP TESTING--------------------------------- path = 'sahara/tests/integration/tests/resources/' pig_job_data = open(path + 'edp-job.pig').read() pig_lib_data = open(path + 'edp-lib.jar').read() mapreduce_jar_data = open(path + 'edp-mapreduce.jar').read() # This is a modified version of WordCount that takes swift configs java_lib_data = open(path + 'edp-java.jar').read() java_configs = { "configs": { "edp.java.main_class": "org.apache.hadoop.examples.WordCount" } } mapreduce_configs = { "configs": { "mapred.mapper.class": "org.apache.oozie.example.SampleMapper", "mapred.reducer.class": "org.apache.oozie.example.SampleReducer" } } mapreduce_streaming_configs = { "configs": { "edp.streaming.mapper": "/bin/cat", "edp.streaming.reducer": "/usr/bin/wc" } } try: self.edp_testing('Pig', [{'pig': pig_job_data}], [{'jar': pig_lib_data}]) self.edp_testing( 'MapReduce', [], [{'jar': mapreduce_jar_data}], mapreduce_configs ) self.edp_testing( 'MapReduce.Streaming', [], [], mapreduce_streaming_configs ) self.edp_testing('Java', [], lib_data_list=[{'jar': java_lib_data}], configs=java_configs, pass_input_output_args=True) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while EDP testing: ' self.print_error_log(message, e) #------------------------------MAP REDUCE TESTING------------------------------ try: self.map_reduce_testing(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while Map Reduce testing: ' self.print_error_log(message, e) #---------------------------CHECK SWIFT AVAILABILITY--------------------------- try: self.check_swift_availability(cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure during check of Swift availability: ' self.print_error_log(message, e) #--------------------------------CLUSTER SCALING------------------------------- change_list = [ { 'operation': 'resize', 'info': ['worker-node-tt-dn', 4] }, { 'operation': 'resize', 'info': ['worker-node-dn', 0] }, { 'operation': 'resize', 'info': ['worker-node-tt', 0] }, { 'operation': 'add', 'info': [ 'new-worker-node-tt', 1, '%s' % node_group_template_tt_id ] }, { 'operation': 'add', 'info': [ 'new-worker-node-dn', 1, '%s' % node_group_template_dn_id ] } ] try: new_cluster_info = self.cluster_scaling(cluster_info, change_list) self.await_active_workers_for_namenode( new_cluster_info['node_info'], self.vanilla_config) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while cluster scaling: ' self.print_error_log(message, e) if not self.vanilla_config.SKIP_SCALING_TEST: #--------------------------CINDER TESTING AFTER SCALING------------------------ try: self.cinder_volume_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while Cinder testing after cluster ' \ 'scaling: ' self.print_error_log(message, e) #---------------------CLUSTER CONFIG TESTING AFTER SCALING--------------------- try: self.cluster_config_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while cluster config testing after ' \ 'cluster scaling: ' self.print_error_log(message, e) #-----------------------MAP REDUCE TESTING AFTER SCALING----------------------- try: self.map_reduce_testing(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure while Map Reduce testing after ' \ 'cluster scaling: ' self.print_error_log(message, e) #--------------------CHECK SWIFT AVAILABILITY AFTER SCALING-------------------- try: self.check_swift_availability(new_cluster_info) except Exception as e: with excutils.save_and_reraise_exception(): self.delete_objects( new_cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list ) message = 'Failure during check of Swift availability ' \ 'after cluster scaling: ' self.print_error_log(message, e) #----------------------------DELETE CREATED OBJECTS---------------------------- self.delete_objects( cluster_info['cluster_id'], cluster_template_id, node_group_template_id_list )