def task_log_success(self, task_id, obj=None, msg='', detail='', **kwargs): from api.task.utils import task_log_success if obj is None: obj = self.server_class(self.dc) task_log_success(task_id, msg, obj=obj, detail=detail, **kwargs)
def put(self): request, vm = self.request, self.vm if vm.locked: raise VmIsLocked if vm.status not in (vm.STOPPED, vm.RUNNING, vm.NOTCREATED): raise VmIsNotOperational( 'VM is not stopped, running or notcreated') if vm.json_changed(): raise PreconditionRequired( 'VM definition has changed; Update first') ser = VmDcSerializer(request, vm, data=self.data) if not ser.is_valid(): return FailureTaskResponse(request, ser.errors, vm=vm) if vm.tasks: raise VmHasPendingTasks old_dc = vm.dc dc = ser.dc # Change DC for one VM, repeat this for other VM + Recalculate node & storage resources in target and source vm.dc = dc vm.save(update_node_resources=True, update_storage_resources=True) # Change task log entries DC for target VM TaskLogEntry.objects.filter(object_pk=vm.uuid).update(dc=dc) # Change related VM backup's DC Backup.objects.filter(vm=vm).update(dc=dc) for ns in ser.nss: # Issue #chili-885 for i in (dc, old_dc): Backup.update_resources(ns, vm, i) Snapshot.update_resources(ns, vm, i) detail = 'Successfully migrated VM %s from datacenter %s to datacenter %s' % ( vm.hostname, old_dc.name, dc.name) # Will create task log entry in old DC res = SuccessTaskResponse(request, detail, vm=vm, msg=LOG_MIGRATE_DC, detail=detail) # Create task log entry in new DC too task_log_success(task_id_from_task_id(res.data.get('task_id'), dc_id=dc.id), LOG_MIGRATE_DC, obj=vm, detail=detail, update_user_tasks=False) return res
def remove_user_dc_binding(task_id, user, dc=None): """Remove user.dc_bound flag""" from api.task.utils import task_log_success # circular imports if not user.dc_bound or (dc and user.dc_bound == dc): # Nothing to do return None dc_id = user.dc_bound.id user.dc_bound = None user.save(update_fields=('dc_bound',)) task_id = task_id_from_task_id(task_id, tg=TG_DC_UNBOUND, dc_id=dc_id, keep_task_suffix=True) task_log_success(task_id, LOG_USER_UPDATE, obj=user, update_user_tasks=False, detail='dc_bound=false')
def attach_dc_virt_object(task_id, msg, obj, dc, user=None): """Attach object into DC and log it""" from api.task.utils import task_log_success # circular imports if isinstance(obj, Domain): DomainDc.objects.create(dc=dc, domain_id=obj.id) elif isinstance(obj, Role): obj.dc_set.add(dc) else: obj.dc.add(dc) task_id = task_id_from_task_id(task_id, tg=TG_DC_BOUND, dc_id=dc.id, keep_task_suffix=True) task_log_success(task_id, msg, obj=obj, owner=getattr(obj, 'owner', None), user=user, update_user_tasks=False, detail="dc='%s'" % dc.name)
def remove_dc_binding_virt_object(task_id, msg, obj, user=None, dc_id=None): """Detach object from DC and log it""" from api.task.utils import task_log_success # circular imports if dc_id is None: if isinstance(obj, Domain): dc_id = obj.dc_bound else: dc_id = obj.dc_bound.id obj.dc_bound = None obj.save(update_fields=('dc_bound', 'changed')) task_id = task_id_from_task_id(task_id, tg=TG_DC_UNBOUND, dc_id=dc_id, keep_task_suffix=True) task_log_success(task_id, msg, obj=obj, owner=getattr(obj, 'owner', None), user=user, update_user_tasks=False, detail='dc_bound=false')
def system_update(task_id, dc_id=None, version=None, key=None, cert=None, force=None, **kwargs): """ Updated system on mgmt by running esdc-git-update. """ assert dc_id assert version SystemUpdateStarted( task_id).send() # Send info to all active socket.io users error = None try: dc = Dc.objects.get_by_id(dc_id) assert dc.is_default() reply = update_command(version, key=key, cert=cert, force=force, sudo=not settings.DEBUG, run=True) result, error = process_update_reply(reply, 'system', version, logger=logger) if error: raise MgmtTaskException(result['message']) else: task_log_success(task_id, kwargs['meta'].get('msg'), obj=dc, task_result=result) return result finally: SystemUpdateFinished( task_id, error=error).send() # Send info to all active socket.io users
def node_worker_status_update(task_id, hostname, queue=None, status=None, **kwargs): """Check (ping) and update compute node status (called by MgmtDaemon)""" logger.debug('Received %s (%s) node worker status: %s', hostname, queue, status) if settings.DEBUG: logger.warning( 'DEBUG mode on => skipping status checking of node worker %s@%s', queue, hostname) return try: node = Node.objects.get(hostname=hostname) except Node.DoesNotExist: logger.warn('Node with hostname=%s does not exist in DB (yet?)', hostname) return if node.is_initializing(): logger.info('Node %s is being initialized. Ignoring %s status update', node, status) return node_is_online = node.is_online() node_is_unreachable = node.is_unreachable() if not ((status == 'online' and node_is_unreachable) or (status == 'offline' and node_is_online) or (status == 'unknown' and (node_is_online or node_is_unreachable))): logger.info('Node %s is already %s. Ignoring %s status update', node, node.get_status_display(), status) return logger.info('Double-checking %s node status by using ping', node) new_status = None up = None if status == 'online': up = node_ping(node, count=5, all_workers=False, all_up=True) # fast and image worker must be up if up: new_status = Node.ONLINE elif status == 'offline': up = node_ping(node, count=3, all_workers=False, all_up=True) # fast or image worker must be down if not up: new_status = Node.UNREACHABLE elif status == 'unknown': up = node_ping(node, count=3, all_workers=False, all_up=True) # fast and image worker must be up if up and node_is_unreachable: new_status = Node.ONLINE elif not up and node_is_online: new_status = Node.UNREACHABLE if new_status: logger.warn('All node %s workers are %s. Node %s status is serious', node, 'up' if up else 'down', status) node.save_status(new_status) logger.warn('Switched node %s status to %s', node, node.get_status_display()) task_log_success(task_id, LOG_STATUS_UPDATE, obj=node, detail='status="%s"' % node.get_status_display(), update_user_tasks=False) node_status_changed.send(task_id, node=node, automatic=True) # Signal! if node.is_online(): node_online.send(task_id, node=node, automatic=True) # Signal! elif node.is_unreachable(): node_unreachable.send(task_id, node=node) # Signal! else: logger.warn( 'At least one node %s worker is still up/down. Ignoring %s, status update', node, status)
def group_modify(self, update=False): group = self.group request = self.request if update: # We are deleting users that are not assigned to group any more, so we have to store all of them before # deleting because we have to update task log for user so he can see he was removed from group original_group_users = set( group.user_set.select_related('dc_bound', 'default_dc').all()) else: group.alias = group.name # just a default original_group_users = set() ser = self.serializer(request, group, data=self.data, partial=update) if not ser.is_valid(): return FailureTaskResponse(request, ser.errors, obj=group, dc_bound=False) ser.save() if update: msg = LOG_GROUP_UPDATE status = HTTP_200_OK else: msg = LOG_GROUP_CREATE status = HTTP_201_CREATED connection.on_commit(lambda: group_relationship_changed.send( group_name=ser.object.name)) res = SuccessTaskResponse(request, ser.data, status=status, obj=group, msg=msg, detail_dict=ser.detail_dict(), dc_bound=False) # let's get the task_id so we use the same one for each log message task_id = res.data.get('task_id') removed_users = None if group.dc_bound and not update: attach_dc_virt_object(res.data.get('task_id'), LOG_GROUP_ATTACH, group, group.dc_bound, user=request.user) if ser.object._users_to_save is not None: # Update Users log that are attached to group current_users = set(ser.object._users_to_save) added_users = current_users - original_group_users removed_users = original_group_users - current_users affected_users = current_users.symmetric_difference( original_group_users) # Remove user.dc_bound flag for newly added users if group is attached to multiple DCs or # to one DC that is different from user.dc_bound if added_users: group_dcs_count = group.dc_set.count() if group_dcs_count >= 1: if group_dcs_count == 1: dc = group.dc_set.get() else: dc = None for user in added_users: remove_user_dc_binding(task_id, user, dc=dc) # Update Users that were removed from group or added to group for user in affected_users: detail = "groups='%s'" % ','.join(user.roles.all().values_list( 'name', flat=True)) task_log_success(task_id, LOG_USER_UPDATE, obj=user, owner=user, update_user_tasks=False, detail=detail) # Permission or users for this group were changed, which may affect the cached list of DC admins for DCs which # are attached to this group. So we need to clear the list of admins cached for each affected DC if ser.object._permissions_to_save is not None or ser.object._users_to_save is not None: for dc in group.dc_set.all(): User.clear_dc_admin_ids(dc) # Users were removed from this group and may loose access to DCs which are attached to this group # So we better set all users current_dc to default_dc if removed_users: for user in removed_users: if not user.is_staff: user.reset_current_dc() return res
def node_sysinfo_cb(result, task_id, node_uuid=None): """ A callback function for updating Node.json (sysinfo). node_uuid will be set only if called via API or GUI """ # in case the callback is called by restarting erigonesd:fast service on compute node, the meta dict lacks # a lot of information; msg is required as part of exception logging inside callback decorator # therefore we set it explicitly result['meta']['msg'] = LOG_NODE_UPDATE if result['returncode'] != 0: logger.error( 'Found nonzero return code in result from esysinfo command on %s', node_uuid) raise TaskException(result, 'Got bad return code (%s)' % result['returncode']) stdout = result.pop('stdout', '') result.pop('stderr', None) node_new = False try: esysinfo = parse_esysinfo(stdout) img_sources = esysinfo.pop('img_sources') img_initial = esysinfo.pop('img_initial') except Exception as e: logger.error( 'Could not parse output from esysinfo command on %s. Error: %s', node_uuid, e) logger.exception(e) raise TaskException(result, 'Could not parse esysinfo output') else: uuid = esysinfo['sysinfo']['UUID'] try: node = Node.objects.get(uuid=uuid) except Node.DoesNotExist: # The head node must be in online state during the admin DC initialization and each compute node must be in # online state during ssh key exchange. node_new = True is_head = not Node.objects.exists() logger.warn('Creating NEW node from sysinfo output from %s', node_uuid) node = Node.create_from_sysinfo(uuid, esysinfo, status=Node.ONLINE, is_head=is_head) node_created.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully created new compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) sshkey_changed = bool(node.sshkey) if node.is_head: logger.warn( 'New node %s is the first node ever created - assuming head node status. ' 'Initializing mgmt system and creating admin DC', node) from api.system.init import init_mgmt try: init_mgmt(node, images=img_initial) except Exception as e: logger.exception(e) result[ 'message'] = 'Error while initializing admin datacenter (%s)' % e task_log_error(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) logger.info('Saving node %s IP address "%s" into admin network', node, node.ip_address) try: # We should proceed even if the IP address is not registered node.ip_address.save() except Exception as e: logger.exception(e) else: admin_net = node.ip_address.subnet # The network was updated by init_mgmt() # Reload Subnet object because it is cached inside node instance admin_net = admin_net.__class__.objects.get(pk=admin_net.pk) # We need a request object request = get_dummy_request(DefaultDc(), 'POST', system_user=True) record_cls = RecordView.Record if admin_net.dns_domain and admin_net.dns_domain == node.domain_name: logger.info('Creating forward A DNS record for node %s', node) # This will fail silently RecordView.add_or_update_record(request, record_cls.A, admin_net.dns_domain, node.hostname, node.address, task_id=task_id, related_obj=node) if admin_net.ptr_domain: logger.info('Creating reverse PTR DNS record for node %s', node) # This will fail silently RecordView.add_or_update_record(request, record_cls.PTR, admin_net.ptr_domain, record_cls.get_reverse( node.address), node.hostname, task_id=task_id, related_obj=node) else: sshkey_changed = node.sshkey_changed(esysinfo) if node.sysinfo_changed(esysinfo) or sshkey_changed: logger.warn('Updating node %s json with sysinfo output from %s', node, node_uuid) node.update_from_sysinfo(esysinfo) # Will save public SSH key too node_json_changed.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully updated compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_UPDATE, obj=node, task_result=result, update_user_tasks=True) else: result[ 'message'] = 'No changes detected on compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_UPDATE, obj=node, task_result=result, update_user_tasks=True) if sshkey_changed: logger.warn( 'SSH key has changed on node %s - creating authorized_keys synchronization tasks', node) try: run_node_authorized_keys_sync() except Exception as e: logger.exception(e) try: run_node_img_sources_sync(node, node_img_sources=img_sources) except Exception as e: logger.exception(e) if node_new: node.del_initializing() # Used by esdc-ee to change node status to unlicensed node_status = getattr(settings, 'VMS_NODE_STATUS_DEFAULT', None) if node_status: node.save_status( node_status) # Set node status (most probably to unlicensed) else: # Always run vm_status_all for an old compute node vm_status_all(task_id, node) # Sync snapshots and backup for every node storage try: NodeVmSnapshotList.sync(node) except Exception as e: logger.exception(e) return result
def node_sysinfo_cb(result, task_id, node_uuid=None): """ A callback function for updating Node.json (sysinfo). node_uuid will be set only if called via API or GUI """ # in case the callback is called by restarting erigonesd:fast service on compute node, the meta dict lacks # a lot of information; msg is required as part of exception logging inside callback decorator # therefore we set it explicitly result['meta']['msg'] = LOG_NODE_UPDATE if result['returncode'] != 0: logger.error( 'Found nonzero return code in result from esysinfo command on %s', node_uuid) raise TaskException(result, 'Got bad return code (%s)' % result['returncode']) stdout = result.pop('stdout', '') result.pop('stderr', None) node_new = False try: esysinfo = parse_esysinfo(stdout) img_sources = esysinfo.pop('img_sources') img_initial = esysinfo.pop('img_initial') except Exception as e: logger.error( 'Could not parse output from esysinfo command on %s. Error: %s', node_uuid, e) logger.exception(e) raise TaskException(result, 'Could not parse esysinfo output') else: uuid = esysinfo['sysinfo']['UUID'] try: node = Node.objects.get(uuid=uuid) except Node.DoesNotExist: # The head node must be in online state during the admin DC initialization and each compute node must be in # online state during ssh key exchange. node_new = True is_head = not Node.objects.exists() logger.warn('Creating NEW node from sysinfo output from %s', node_uuid) node = Node.create_from_sysinfo(uuid, esysinfo, status=Node.ONLINE, is_head=is_head) node_created.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully created new compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) sshkey_changed = bool(node.sshkey) if node.is_head: logger.warn( 'New node %s is the first node ever created - assuming head node status. ' 'Initializing mgmt system and creating admin DC', node) from api.system.init import init_mgmt try: init_mgmt(node, images=img_initial) except Exception as e: logger.exception(e) result[ 'message'] = 'Error while initializing admin datacenter (%s)' % e task_log_error(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) try: _save_node_ip_address(task_id, node) except Exception as e: logger.exception(e) else: sshkey_changed = node.sshkey_changed(esysinfo) sysinfo_changed = node.sysinfo_changed(esysinfo) if sysinfo_changed or sshkey_changed: logger.warn('Updating node %s json with sysinfo output from %s', node, node_uuid) node.update_from_sysinfo(esysinfo) # Will save public SSH key too node_json_changed.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully updated compute node %s' % node.hostname else: node_json_unchanged.send(task_id, node=node) # Signal! result[ 'message'] = 'No changes detected on compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_UPDATE, obj=node, task_result=result, update_user_tasks=True) if sshkey_changed: logger.warn( 'SSH key has changed on node %s - creating authorized_keys synchronization tasks', node) try: run_node_authorized_keys_sync() except Exception as e: logger.exception(e) try: run_node_img_sources_sync(node, node_img_sources=img_sources) except Exception as e: logger.exception(e) if node_new: node.del_initializing() # Used by esdc-ee to change node status to unlicensed node_status = getattr(settings, 'VMS_NODE_STATUS_DEFAULT', None) if node_status: node.save_status( node_status) # Set node status (most probably to unlicensed) else: # Always run vm_status_all for an old compute node vm_status_all(task_id, node) # Sync snapshots and backup for every node storage try: NodeVmSnapshotList.sync(node) except Exception as e: logger.exception(e) # Refresh cached version information + emit event informing about restarted erigonesd:fast try: del node.system_version # Sometimes the node worker does not respond within the given timeout so we have to try more than once for i in range(5): if node.system_version: break logger.info('Node %s has system version %s', node, node.system_version) if owner_id_from_task_id(task_id) == TASK_USER: # internal user ID NodeSystemRestarted(node, system_version=node.system_version).send() except Exception as e: logger.exception(e) return result