def schedule_task(schedule_class, run_settings, local_settings): schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes') try: maximum_retry = getval(run_settings, '%s/input/reliability/maximum_retry' % RMIT_SCHEMA) except SettingNotFoundException: maximum_retry = 0 local_settings['maximum_retry'] = maximum_retry if schedule_class.procs_2b_rescheduled: start_reschedule(schedule_class, run_settings, local_settings) else: start_schedule(schedule_class, run_settings, local_settings)
def complete_bootstrap(bootstrap_class, local_settings): try: nodes = get_registered_vms(local_settings) running_created_nodes = [ x for x in bootstrap_class.created_nodes if str(x[3]) == 'running' ] if len(nodes) < len(running_created_nodes): raise VMTerminatedError except NoRegisteredVMError as e: logger.debug('NoRegisteredVMError detected') ftmanager = FTManager() ftmanager.manage_failure(e, stage_class=bootstrap_class, settings=local_settings) except VMTerminatedError as e: logger.debug('VMTerminatedError detected') ftmanager = FTManager() ftmanager.manage_failure(e, stage_class=bootstrap_class, settings=local_settings) for node in nodes: node_ip = node.ip_address if not node_ip: node_ip = node.private_ip_address if (node_ip in [ x[1] for x in bootstrap_class.bootstrapped_nodes if x[1] == node_ip ]): continue relative_path_suffix = bootstrap_class.get_relative_output_path( local_settings) relative_path = "%s@%s" % (local_settings['type'], relative_path_suffix) destination = get_url_with_credentials(local_settings, relative_path, is_relative_path=True, ip_address=node_ip) logger.debug("Relative path %s" % relative_path) logger.debug("Destination %s" % destination) try: fin = _is_bootstrap_complete(node_ip, local_settings, destination) except IOError, e: logger.error(e) fin = False except Exception as e: logger.error(e) fin = False ftmanager = FTManager() ftmanager.manage_failure(e, stage_class=bootstrap_class, vm_ip=node_ip, vm_id=node.id, settings=local_settings)
def _manage_vm_terminated_error(self, kwargs): try: self.stage_class = kwargs['stage_class'] running_vms = get_registered_vms(kwargs['settings']) running_vms_id = [] for vm in running_vms: running_vms_id.append(vm.id) created_vms = self.stage_class.created_nodes for vm in created_vms: if str(vm[0]) not in running_vms_id: vm[3] = 'failed' except KeyError as e: logger.debug('key_error = %s' % e)
def complete_bootstrap(bootstrap_class, local_settings, id): logger.debug("complete_bootstrap") try: nodes = get_registered_vms(local_settings) running_created_nodes = [x for x in bootstrap_class.created_nodes if str(x[3]) == 'running'] logger.debug("running_created_nodes=%s" % running_created_nodes) if len(nodes) < len(running_created_nodes): raise VMTerminatedError except NoRegisteredVMError as e: logger.debug('NoRegisteredVMError detected') ftmanager = FTManager() ftmanager.manage_failure(e, stage_class=bootstrap_class, settings=local_settings) except VMTerminatedError as e: logger.debug('VMTerminatedError detected') ftmanager = FTManager() ftmanager.manage_failure(e, stage_class=bootstrap_class, settings=local_settings) logger.debug("nodes=%s" % nodes) for node in nodes: logger.debug("node=%s" % node) node_ip = node.ip_address if not node_ip: node_ip = node.private_ip_address logger.debug("node_ip=%s" % node_ip) logger.debug("bootstrap_class.bootstrapped_nodes=%s" % bootstrap_class.bootstrapped_nodes) node_list = [x[1] for x in bootstrap_class.bootstrapped_nodes if x[1] == node_ip] logger.debug("node_list=%s" % node_list) if (node_ip in node_list): continue relative_path_suffix = bootstrap_class.get_relative_output_path(local_settings) logger.debug("relative_path_suffix=%s" % relative_path_suffix) relative_path = "%s@%s" % (local_settings['type'], relative_path_suffix) destination = get_url_with_credentials(local_settings, relative_path, is_relative_path=True, ip_address=node_ip) logger.debug("Relative path %s" % relative_path) logger.debug("Destination %s" % destination) try: fin = _is_bootstrap_complete(node_ip, local_settings, destination) except IOError, e: logger.error(e) fin = False except Exception as e: logger.error(e) fin = False ftmanager = FTManager() ftmanager.manage_failure(e, stage_class=bootstrap_class, vm_ip=node_ip, vm_id=node.id, settings=local_settings)
def start_multi_bootstrap_task(settings, relative_path_suffix): """ Run the package on each of the nodes in the group and grab any output as needed """ nodes = get_registered_vms(settings) logger.debug("nodes=%s" % nodes) requested_nodes = 0 maketarget_nodegroup_pair = {} # TODO: need testcases for following code if not maketarget_nodegroup_pair: EMPTY_MAKE_TARGET = '' requested_nodes = len(nodes) maketarget_nodegroup_pair[EMPTY_MAKE_TARGET] = requested_nodes else: for i in maketarget_nodegroup_pair.keys(): requested_nodes += maketarget_nodegroup_pair[i] if requested_nodes > len(nodes): message = "Requested nodes %d; but available nodes %s " \ % (requested_nodes, len(nodes)) logger.exception(message) raise InsufficientResourceError(message) logger.info("Requested nodes %d: \nAvailable nodes %s " % (requested_nodes, len(nodes))) logger.debug('starting setup') for make_target in maketarget_nodegroup_pair: for i in range(0, maketarget_nodegroup_pair[make_target]): instance = nodes[0] node_ip = instance.ip_address if not node_ip: node_ip = instance.private_ip_address logger.debug("node_ip=%s" % node_ip) logger.debug('constructing source') source = get_url_with_credentials(settings, settings['payload_source']) logger.debug('source=%s' % source) #relative_path = '%s@%s' % (settings['type'], settings['payload_destination']) relative_path = '%s@%s' % (settings['type'], relative_path_suffix) destination = get_url_with_credentials(settings, relative_path, is_relative_path=True, ip_address=node_ip) logger.debug("Source %s" % source) logger.debug("Destination %s" % destination) logger.debug("Relative path %s" % relative_path) _start_bootstrap(instance, node_ip, settings, source, destination) nodes.pop(0)
def start_multi_bootstrap_task(settings, relative_path_suffix): """ Run the package on each of the nodes in the group and grab any output as needed """ nodes = get_registered_vms(settings) logger.debug("nodes=%s" % nodes) requested_nodes = 0 maketarget_nodegroup_pair = {} # TODO: need testcases for following code if not maketarget_nodegroup_pair: EMPTY_MAKE_TARGET = '' requested_nodes = len(nodes) maketarget_nodegroup_pair[EMPTY_MAKE_TARGET] = requested_nodes else: for i in maketarget_nodegroup_pair.keys(): requested_nodes += maketarget_nodegroup_pair[i] if requested_nodes > len(nodes): message = "Requested nodes %d; but available nodes %s " \ % (requested_nodes, len(nodes)) logger.exception(message) raise InsufficientResourceError(message) logger.info("Requested nodes %d: \nAvailable nodes %s " % (requested_nodes, len(nodes))) logger.debug('starting setup') for make_target in maketarget_nodegroup_pair: for i in range(0, maketarget_nodegroup_pair[make_target]): instance = nodes[0] node_ip = instance.ip_address if not node_ip: node_ip = instance.private_ip_address logger.debug("node_ip=%s" % node_ip) logger.debug('constructing source') source = get_url_with_credentials(settings, "/" + settings['payload_source']) logger.debug('source=%s' % source) #relative_path = '%s@%s' % (settings['type'], settings['payload_destination']) relative_path = '%s@%s' % (settings['type'], relative_path_suffix) destination = get_url_with_credentials(settings, relative_path, is_relative_path=True, ip_address=node_ip) logger.debug("Source %s" % source) logger.debug("Destination %s" % destination) logger.debug("Relative path %s" % relative_path) _start_bootstrap(instance, node_ip, settings, source, destination) nodes.pop(0)
def schedule_task(schedule_class, run_settings, local_settings): schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes') try: maximum_retry = getval(run_settings, '%s/input/reliability/maximum_retry' % RMIT_SCHEMA) except SettingNotFoundException: maximum_retry = 0 local_settings['maximum_retry'] = maximum_retry try: id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): id = 0 if schedule_class.procs_2b_rescheduled: messages.info(run_settings, '%d: rescheduling failed processes' % (id)) start_reschedule(schedule_class, run_settings, local_settings) else: messages.info(run_settings, '%d: scheduling processes' % id) start_schedule(schedule_class, run_settings, local_settings)
def schedule_task(schedule_class, run_settings, local_settings): schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes') try: maximum_retry = getval( run_settings, '%s/input/reliability/maximum_retry' % RMIT_SCHEMA) except SettingNotFoundException: maximum_retry = 0 local_settings['maximum_retry'] = maximum_retry try: id = int(getval(run_settings, '%s/system/id' % RMIT_SCHEMA)) except (SettingNotFoundException, ValueError): id = 0 if schedule_class.procs_2b_rescheduled: messages.info(run_settings, '%d: rescheduling failed processes' % (id)) start_reschedule(schedule_class, run_settings, local_settings) else: messages.info(run_settings, '%d: scheduling processes' % id) start_schedule(schedule_class, run_settings, local_settings)
def complete_schedule(schedule_class, local_settings): logger.debug("started") schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes') for node in schedule_class.nodes: node_ip = node.ip_address logger.debug("node_ip=%s" % node_ip) if not node_ip: node_ip = node.private_ip_address if (node_ip in [x[1] for x in schedule_class.scheduled_nodes if x[1] == node_ip]) \ and (not schedule_class.procs_2b_rescheduled): logger.debug("skip1") continue if (node_ip in [x[1] for x in schedule_class.rescheduled_nodes if x[1] == node_ip]) \ and schedule_class.procs_2b_rescheduled: logger.debug("skip2") continue if not is_vm_running(node): # An unlikely situation where the node crashed after is was # detected as registered. #FIXME: should error nodes be counted as finished? #FIXME: remove this instance from created_nodes logger.error('Instance %s not running' % node.id) #self.error_nodes.append((node.id, node_ip, # unicode(node.region))) logger.debug("skip3") continue logger.debug('mynode=%s' % node_ip) try: #relative_path = "%s@%s" % (local_settings['type'], # local_settings['payload_destination']) relative_path = "%s@%s" % (local_settings['type'], schedule_class.get_relative_output_path(local_settings)) destination = get_url_with_credentials( local_settings, relative_path, is_relative_path=True, ip_address=node_ip) except Exception, e: logger.debug(e) logger.debug("Relative path %s" % relative_path) logger.debug("Destination %s" % destination) fin = _is_schedule_complete(node_ip, local_settings, destination) logger.debug("fin=%s" % fin) if fin: logger.debug("done.") node_list = schedule_class.scheduled_nodes if schedule_class.procs_2b_rescheduled: node_list = schedule_class.rescheduled_nodes if not (node_ip in [x[1] for x in node_list if x[1] == node_ip]): node_list.append([node.id, node_ip, unicode(node.region), 'running']) if schedule_class.procs_2b_rescheduled: scheduled_procs = [x for x in schedule_class.current_processes if x['ip_address'] == node_ip and x['status'] == 'reschedule_ready'] schedule_class.total_rescheduled_procs += len(scheduled_procs) for process in scheduled_procs: process['status'] = 'ready' schedule_class.all_processes = update_lookup_table( schedule_class.all_processes, reschedule_to_ready='reschedule_to_ready') else: scheduled_procs = [x['ip_address'] for x in schedule_class.current_processes if x['ip_address'] == node_ip] schedule_class.total_scheduled_procs += len(scheduled_procs) #if self.total_scheduled_procs == len(self.current_processes): # break else: logger.info("We have already " + "scheduled process on node %s" % node_ip) else: print "job still running on %s" % node_ip
def complete_schedule(schedule_class, local_settings): logger.debug("started") schedule_class.nodes = get_registered_vms(local_settings, node_type='bootstrapped_nodes') for node in schedule_class.nodes: node_ip = node.ip_address logger.debug("node_ip=%s" % node_ip) if not node_ip: node_ip = node.private_ip_address if (node_ip in [x[1] for x in schedule_class.scheduled_nodes if x[1] == node_ip]) \ and (not schedule_class.procs_2b_rescheduled): logger.debug("skip1") continue if (node_ip in [x[1] for x in schedule_class.rescheduled_nodes if x[1] == node_ip]) \ and schedule_class.procs_2b_rescheduled: logger.debug("skip2") continue if not is_vm_running(node): # An unlikely situation where the node crashed after is was # detected as registered. #FIXME: should error nodes be counted as finished? #FIXME: remove this instance from created_nodes logger.error('Instance %s not running' % node.id) #self.error_nodes.append((node.id, node_ip, # unicode(node.region))) logger.debug("skip3") continue logger.debug('mynode=%s' % node_ip) try: #relative_path = "%s@%s" % (local_settings['type'], # local_settings['payload_destination']) relative_path = "%s@%s" % ( local_settings['type'], schedule_class.get_relative_output_path(local_settings)) destination = get_url_with_credentials(local_settings, relative_path, is_relative_path=True, ip_address=node_ip) except Exception, e: logger.debug(e) logger.debug("Relative path %s" % relative_path) logger.debug("Destination %s" % destination) fin = _is_schedule_complete(node_ip, local_settings, destination) logger.debug("fin=%s" % fin) if fin: logger.debug("done.") node_list = schedule_class.scheduled_nodes if schedule_class.procs_2b_rescheduled: node_list = schedule_class.rescheduled_nodes if not (node_ip in [x[1] for x in node_list if x[1] == node_ip]): node_list.append( [node.id, node_ip, unicode(node.region), 'running']) if schedule_class.procs_2b_rescheduled: scheduled_procs = [ x for x in schedule_class.current_processes if x['ip_address'] == node_ip and x['status'] == 'reschedule_ready' ] schedule_class.total_rescheduled_procs += len( scheduled_procs) for process in scheduled_procs: process['status'] = 'ready' schedule_class.all_processes = update_lookup_table( schedule_class.all_processes, reschedule_to_ready='reschedule_to_ready') else: scheduled_procs = [ x['ip_address'] for x in schedule_class.current_processes if x['ip_address'] == node_ip ] schedule_class.total_scheduled_procs += len( scheduled_procs) #if self.total_scheduled_procs == len(self.current_processes): # break else: logger.info("We have already " + "scheduled process on node %s" % node_ip) else: print "job still running on %s" % node_ip