def _refresh_node_resource(self, res): self._logger.debug("invoked.") node = utils.get_conctrller_object(res.uri) try: with self._nccall_sem: rs = node.do_describe_resource() if rs['code']: self._logger.warn('failed to connect node %s' % res.id) self._change_node_status(res, 'error') return new_res = NodeResource(rs['data']['resource']) node_res = self._get_node(res.id) if not node_res: raise self._change_node_status(node_res, new_res.node_status) node_res.node_status = new_res.node_status node_res.mem_size_max = new_res.mem_size_max node_res.mem_size_available = new_res.mem_size_available node_res.disk_size_max = new_res.disk_size_max node_res.disk_size_available = new_res.disk_size_available node_res.number_cores_max = new_res.number_cores_max node_res.number_cores_available = new_res.number_cores_available except: self._change_node_status(res, 'error') self._logger.debug("done")
def _refresh_instances_by_list(self, insts): if not isinstance(insts, list) or len(insts) == 0: return inst_ids = [] [inst_ids.append(inst.instance_id) for inst in insts] try: node = utils.get_conctrller_object(insts[0].node.uri) with self._nccall_sem: rs = node.do_describe_instances(inst_ids) if rs['code']: self._logger.warn('failed to refresh instances: %s' % str(inst_ids)) return except: self._logger.warn('failed to connect node %s' % insts[0].node.uri) return data_instances = rs['data']['instances'] new_instances_map = {} for data_inst in data_instances: new_inst = Instance(data_inst) new_instances_map[new_inst.instance_id] = new_inst [self._refresh_instance(inst, new_instances_map.get(inst.instance_id, None)) for inst in insts]
def _run_instance_thread(self, instance_id, reservation_id, param_t, image_id, image_url, kernel_id, kernel_url, ramdisk_id, ramdisk_url, net_config, user_id, target_node_id): self._logger.debug('invoked') res = self._get_node(target_node_id) node_server = utils.get_conctrller_object(res.uri) start_time = time.time() rs = {'code': 0xFFFF} while rs['code'] and (time.time() - start_time < config.CLUSTER_WAKE_THRESH): with self._nccall_sem: rs = node_server.do_run_instance(instance_id, reservation_id, param_t, image_id, image_url, kernel_id, kernel_url, ramdisk_id, ramdisk_url, net_config, user_id) if rs['code']: time.sleep(1) if rs['code']: self._logger.warn('failed to run instance: %s' % instance_id) return 1 inst = ClusterInstance.new_instance(instance_id, reservation_id, param_t, image_id, image_url, kernel_id, kernel_url, ramdisk_id, ramdisk_url, InstanceState.PENDING, net_config, user_id, self._get_node(target_node_id)) with self._inst_lock: self._add_instance(inst) with self._res_lock: node = self._get_node(target_node_id) node.mem_size_available -= param_t.mem node.number_cores_available -= param_t.cores node.disk_size_available -= param_t.disk self._logger.debug('done')
def view_start_instance(request): if request.method != "POST": return INSTANCE_INDEX(request) args_dict = request.POST name = args_dict.get('name', None) if name == None: return INSTANCE_INDEX(request) try: inst = Instance.objects.get(instance_id=name) except: return INSTANCE_INDEX(request) current_user = auth.get_user(request) if not inst.user.username == current_user.username: return INSTANCE_INDEX(request) # send start instance message to cc cc_name = _schedule_instance(inst) if cc_name == None: return INSTANCE_INDEX(request) cc = Cluster.objects.get(name=cc_name) cc_server = utils.get_conctrller_object(utils.uri_generator(cc.socket.ip, cc.socket.port)) rs = cc_server.do_run_instances([inst.instance_id], None, inst.user_id, {'cores': inst.params.cores, 'mem': inst.params.mem, 'disk': inst.params.disk}, inst.image.image_id, inst.image.local_dev_real, None, None, None, None, [inst.net.mac], None) if rs['code'] != 0: return INSTANCE_INDEX(request) return HttpResponseRedirect("/clc/instance")
def _add_node_thread(self, nid, ip, port): self._logger.debug('invoked') node = utils.get_conctrller_object(utils.uri_generator(ip, port)) try: with self._nccall_sem: rs = node.do_describe_resource() if rs['code'] != 0x0: self._logger.warn(rs.data['msg']) return res_data = rs['data'] res_data.update({'uri': utils.uri_generator(ip, port), 'id': nid}) res = ClusterResource(rs['data']) except Exception, err: self._logger.warn(err) res = ClusterResource() res.uri = utils.uri_generator(ip, port) res.id = nid res.node_status = 'error'
def _find_and_reboot_instnce(self, inst_id): self._logger.debug('invoked') inst = self._get_instance(inst_id) if inst == None: self._logger.warn('instance %s do not exists on cluster' % inst_id) return node = utils.get_conctrller_object(inst.node.uri) try: with self._nccall_sem: rs = node.do_reboot_instance(inst_id) if rs['code'] != 0: self._logger.warn('failed to reboot instance %s on node %s' % (inst_id, inst.node.id)) return except: self._logger.warn('failed to connect node %s' % inst.node.id) return self._logger.debug('done')
def _cluster_server(self, cluster): return utils.get_conctrller_object(utils.uri_generator(cluster.socket.ip, cluster.socket.port))
try: inst = Instance.objects.get(instance_id=inst_id) except Exception, ex: return INSTANCE_INDEX(request) cc_server = None for cluster in Cluster.objects.all(): if cc_server: break for node in cluster.nodes.all(): if cc_server: break for inst_t in node.instances.all(): if inst_t.instance_id == inst.instance_id: cc_server = utils.get_conctrller_object(utils.uri_generator(cluster.socket.ip, cluster.socket.port)) break if cc_server is None: return INSTANCE_INDEX(request) try: rs = cc_server.do_terminate_instances([inst.instance_id]) if rs['code'] != 0x0: return INSTANCE_INDEX(request) except Exception, ex: print ex return INSTANCE_INDEX(request) return HttpResponseRedirect("/clc/instance")