def vm_migrate_cb(result, task_id, vm_uuid=None, slave_vm_uuid=None): """ A callback function for api.vm.migrate.views.vm_migrate. """ ghost_vm = SlaveVm.get_by_uuid(slave_vm_uuid) msg = result.get('message', '') if result['returncode'] == 0 and msg and 'Successfully migrated' in msg: # Save node and delete placeholder VM first node = ghost_vm.vm.node nss = set(ghost_vm.vm.get_node_storages()) ghost_vm.delete( ) # post_delete signal will update node and storage resources # Fetch VM after ghost_vm is deleted, because it updates vm.slave_vms array vm = Vm.objects.select_related('node', 'dc').get(uuid=vm_uuid) changing_node = vm.node != ghost_vm.vm.node json = result.pop('json', None) try: # save json from smartos json_active = vm.json.load(json) vm.json_active = json_active vm.json = json_active except Exception as e: logger.exception(e) logger.error( 'Could not parse json output from vm_migrate(%s). Error: %s', vm_uuid, e) raise TaskException(result, 'Could not parse json output') nss.update(list(vm.get_node_storages())) # Revert status and set new node (should trigger node resource update) vm.revert_notready(save=False) if changing_node: vm.set_node(node) vm.save(update_node_resources=True, update_storage_resources=nss) SlaveVm.switch_vm_snapshots_node_storages(vm, nss=nss) vm_node_changed.send(task_id, vm=vm, force_update=True) # Signal! else: vm = Vm.objects.get(uuid=vm_uuid) _vm_migrate_cb_failed(result, task_id, vm, ghost_vm) logger.error( 'Found nonzero returncode in result from vm_migrate(%s). Error: %s', vm_uuid, msg) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) task_log_cb_success(result, task_id, vm=vm, **result['meta']) if vm.json_changed(): vm_update(vm) return result
def vm_delete_cb(result, task_id, vm_uuid=None): """ A callback function for api.vm.base.views.vm_manage. """ vm = Vm.objects.select_related('dc').get(uuid=vm_uuid) msg = result.get('message', '') if result['returncode'] == 0 and msg.find('Successfully deleted') == 0: _vm_delete_cb_succeeded(task_id, vm) else: logger.error( 'Found nonzero returncode in result from DELETE vm_manage(%s). Error: %s', vm_uuid, msg) if is_vm_missing(vm, msg): logger.critical('VM %s has vanished from compute node!', vm_uuid) _vm_delete_cb_succeeded(task_id, vm) else: _vm_delete_cb_failed(result, task_id, vm) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) task_log_cb_success(result, task_id, vm=vm, **result['meta']) return result
def vm_snapshot_list_cb(result, task_id, vm_uuid=None, snap_ids=None): """ A callback function for DELETE api.vm.snapshot.views.vm_snapshot_list. """ snaps = Snapshot.objects.filter(id__in=snap_ids) action = result['meta']['apiview']['method'] if result['returncode'] == 0: vm = snaps[0].vm if action == 'DELETE': snaps.delete() result['message'] = 'Snapshots successfully deleted' else: _vm_snapshot_list_cb_failed(result, task_id, snaps, action) msg = result.get('message', '') logger.error( 'Found nonzero returncode in result from %s vm_snapshot_list(%s, %s). Error: %s', action, vm_uuid, snaps, msg) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) task_log_cb_success(result, task_id, vm=vm, **result['meta']) return result
def vm_screenshot_cb(result, task_id, vm_uuid=None): """ A callback function for api.vm.other.views.vm_screenshot_cb. """ if result['returncode'] == 0: vm = Vm.objects.get(uuid=vm_uuid) try: _ppm = StringIO(zlib.decompress(base64.b64decode(result['image']))) _ppm.seek(0) ppm = Image.open(_ppm) png = StringIO() ppm.save(png, format='PNG') del _ppm del ppm png.seek(0) _png = base64.b64encode(png.read()) vm.screenshot = _png result['image'] = _png result['message'] += '\nScreenshot saved' del _png except Exception as e: logger.exception(e) logger.error( 'Could not parse or save image from vm_screenshot(%s). Error: %s', vm_uuid, e) # noinspection PyBroadException try: del result['image'] except: pass raise TaskException(result, 'Could not parse or save screenshot image') else: logger.error( 'Found nonzero returncode in result from vm_screenshot(%s). Error: %s', vm_uuid, result.get('message', '')) # noinspection PyBroadException try: del result['image'] except: pass raise TaskException(result, 'Did not receive a proper screenshot image') vm.tasks_del(task_id) return result
def task_log_cb(result, task_id, task_status=None, msg='', vm=None, obj=None, cleanup=False, check_returncode=False, **kwargs): """ Log callback -> logs finished task. """ if vm: obj = vm if not obj: try: obj = get_task_object(kwargs) except ObjectDoesNotExist: pass if cleanup: # Sometimes when an execute task fails with an exception or is revoked # we might need to run things that the callback would run. But now the # callback won't run so emergency cleanup should go into this function. try: logger.info('Running cleanup for task=%s with task_status=%s', task_id, task_status) task_cleanup(result, task_id, task_status, obj, **kwargs) except Exception as e: logger.exception(e) logger.error( 'Got exception (%s) when doing cleanup for task=%s, task_status=%s, obj=%s', e, task_id, task_status, obj) if check_returncode: rc = result.get('returncode', None) if rc != 0: err = 'Got bad return code (%s)' % rc err_msg = result.get('message', None) if err_msg: err += ' Error: %s' % err_msg logger.error( 'Found nonzero returncode in result from %s. Error: %s', kwargs.get('apiview', msg), result) raise TaskException(result, err) task_log(task_id, msg, obj=obj, task_status=task_status, task_result=result) return result
def node_vm_snapshot_sync_cb(result, task_id, nodestorage_id=None): """ A callback function for PUT api.node.snapshot.views.node_vm_snapshot_list a.k.a. node_vm_snapshot_sync. """ ns = NodeStorage.objects.select_related('node', 'storage').get(id=nodestorage_id) node = ns.node data = result.pop('data', '') if result['returncode'] != 0: msg = result.get('message', '') or data logger.error('Found nonzero returncode in result from PUT node_vm_snapshot_list(%s@%s). Error: %s', ns.zpool, node.hostname, msg) raise TaskException(result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) node_snaps = parse_node_snaps(data) logger.info('Found %d snapshots on node storage %s@%s', len(node_snaps), ns.zpool, node.hostname) ns_snaps = ns.snapshot_set.select_related('vm').all() lost = sync_snapshots(ns_snaps, node_snaps) # Remaining snapshots on compute node are internal or old lost snapshots which do not exist in DB # or replicated snapshots. Let's count all the remaining es- and as- snapshots sizes as replicated snapshots snap_prefix = Snapshot.USER_PREFIX rep_snaps_size = sum(t_long(node_snaps.pop(snap)[1]) for snap in tuple(node_snaps.keys()) if snap.startswith(snap_prefix)) ns.set_rep_snapshots_size(rep_snaps_size) # The internal snapshots also include dataset backups on a backup node if node.is_backup: node_bkps = ns.backup_set.select_related('node', 'vm').filter(type=Backup.DATASET) lost_bkps = sync_backups(node_bkps, node_snaps) else: lost_bkps = 0 logger.info('Node storage %s@%s has %s bytes of replicated snapshots', ns.zpool, node.hostname, rep_snaps_size) logger.info('Node storage %s@%s has following internal/service snapshots: %s', ns.zpool, node.hostname, node_snaps.keys()) # Recalculate snapshot counters for all DCs ns.save(update_resources=True, update_dcnode_resources=True, recalculate_vms_size=False, recalculate_snapshots_size=True, recalculate_images_size=False, recalculate_backups_size=False, recalculate_dc_snapshots_size=ns.dc.all()) # Remove cached snapshot sum for each VM: for vm_uuid in ns_snaps.values_list('vm__uuid', flat=True).distinct(): Snapshot.clear_total_vm_size(vm_uuid) if not result['meta'].get('internal'): msg = 'Snapshots successfully synced' if lost: msg += '; WARNING: %d snapshot(s) lost' % lost if lost_bkps: msg += '; WARNING: %d backup(s) lost' % lost_bkps result['message'] = msg task_log_cb_success(result, task_id, obj=ns, **result['meta']) return result
def vm_update_cb(result, task_id, vm_uuid=None): """ A callback function for api.vm.base.views.vm_manage. """ vm = Vm.objects.select_related('dc').get(uuid=vm_uuid) _vm_update_cb_done(result, task_id, vm) msg = result.get('message', '') force = result['meta']['apiview']['force'] if result['returncode'] == 0 and (force or msg.find('Successfully updated') >= 0): json = result.pop('json', None) try: # save json from smartos json_active = vm.json.load(json) vm_delete_snapshots_of_removed_disks( vm) # Do this before updating json and json_active vm.json_active = json_active vm.json = json_active except Exception as e: logger.exception(e) logger.error( 'Could not parse json output from PUT vm_manage(%s). Error: %s', vm_uuid, e) raise TaskException(result, 'Could not parse json output') else: vm.save(update_node_resources=True, update_storage_resources=True, update_fields=('enc_json', 'enc_json_active', 'changed')) vm_update_ipaddress_usage(vm) vm_json_active_changed.send(task_id, vm=vm) # Signal! else: logger.error( 'Found nonzero returncode in result from PUT vm_manage(%s). Error: %s', vm_uuid, msg) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) task_log_cb_success(result, task_id, vm=vm, **result['meta']) return result
def vm_status_current_cb(result, task_id, vm_uuid=None, force_change=False): """ A callback function for GET api.vm.status.views.vm_status. It is responsible for displaying the actual VM status to the user and optionally changing status in DB. """ stdout = result.pop('stdout', '') stderr = result.pop('stderr', '') rc = result.pop('returncode') if rc != 0: logger.error( 'Found nonzero returncode in result from GET vm_status(%s). Error: %s', vm_uuid, stderr) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (rc, stderr)) line = stdout.strip().split(':') result['status'] = line[0] result['status_changed'] = False vm = Vm.objects.select_related('node', 'slavevm').get(uuid=vm_uuid) try: if result['status']: state = Vm.STATUS_DICT[result['status']] else: # vmadm list returned with rc=0 and empty stdout => VM does not exist on compute node state = Vm.NOTCREATED result['status'] = dict(Vm.STATUS).get(state) except (KeyError, IndexError): result['message'] = 'Unidentified VM status' else: result['message'] = '' state_cache = cache.get(Vm.status_key(vm_uuid)) # Check and eventually save VM's status if _vm_status_check(task_id, vm.node.uuid, vm_uuid, state, state_cache=state_cache, force_change=force_change, change_time=_get_task_time(result, 'exec_time')): result['status_changed'] = True vm.tasks_del(task_id) return result
def vm_status_current_cb(result, task_id, vm_uuid=None): """ A callback function for GET api.vm.status.views.vm_status. It is responsible for displaying the actual VM status to the user and optionally changing status in DB. """ stdout = result.pop('stdout', '') stderr = result.pop('stderr', '') rc = result.pop('returncode') if rc != 0: logger.error( 'Found nonzero returncode in result from GET vm_status(%s). Error: %s', vm_uuid, stderr) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (rc, stderr)) line = stdout.strip().split(':') result['status'] = line[0] vm = Vm.objects.select_related('node').get(uuid=vm_uuid) try: state = Vm.STATUS_DICT[result['status']] zoneid = int(line[1] or Vm.STOPPED_ZONEID) except (KeyError, IndexError): result['message'] = 'Unidentified VM status' else: result['message'] = '' state_cache = cache.get(Vm.status_key(vm_uuid)) zoneid_cache = cache.get(Vm.zoneid_key(vm_uuid)) if state_cache != state or zoneid_cache != zoneid: # Check and eventually save VM's status _vm_status_check(task_id, vm.node.uuid, vm_uuid, zoneid, state, state_cache=state_cache, zoneid_cache=zoneid_cache, change_time=_get_task_time(result, 'exec_time')) vm.tasks_del(task_id) return result
def node_image_cb(result, task_id, nodestorage_id=None, zpool=None, img_uuid=None): """ A callback function for api.node.image.views.node_image. """ ns = NodeStorage.objects.select_related('node').get(id=nodestorage_id) img = Image.objects.get(uuid=img_uuid) img.del_ns_status(ns) method = result['meta']['apiview']['method'] msg = result.get('message', '') log_msg = None result.pop('stderr', None) if result['returncode'] == 0: if method == 'POST': if 'Imported image' in msg or 'is already installed, skipping' in msg: ns.images.add(img) log_msg = LOG_IMG_IMPORT elif method == 'DELETE': if 'Deleted image' in msg: ns.images.remove(img) log_msg = LOG_IMG_DELETE if log_msg: task_log_cb_success(result, task_id, msg=log_msg, obj=ns) ns.update_resources(recalculate_vms_size=False, recalculate_backups_size=False, recalculate_images_size=True) return result logger.error( 'Found nonzero returncode in result from %s node_image(%s, %s, %s). Error: %s', method, nodestorage_id, zpool, img_uuid, msg) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg))
def vm_replica_reinit_cb(result, task_id, vm_uuid=None, slave_vm_uuid=None): """ A callback function for api.vm.replica.views.vm_replica_reinit. """ slave_vm = SlaveVm.get_by_uuid(slave_vm_uuid) vm = slave_vm.master_vm assert vm.uuid == vm_uuid action = result['meta']['apiview']['method'] result, jsons = _parse_vm_replica_result(result, vm, slave_vm, action, key_json_idx=0, cb_name='vm_replica_reinit') if result['returncode'] != 0: if jsons and jsons[0].get('success', False): # Successfully reversed replication slave_vm.last_sync = _parse_last_sync(jsons[0]) slave_vm.rep_reinit_required = False slave_vm.save() msg = result['detail'] logger.error( 'Found nonzero returncode in result from %s vm_replica_reinit(%s, %s). Error: %s', action, vm_uuid, slave_vm_uuid, msg) errmsg = _update_task_result_failure(result, msg) raise TaskException(result, errmsg) slave_vm.rep_reinit_required = False slave_vm.last_sync = _parse_last_sync(jsons[0]) _save_svc_state(slave_vm, jsons) _save_svc_params(slave_vm, jsons) slave_vm.save() msg = 'Server replica was successfully reinitialized' _update_task_result_success(result, slave_vm, action, msg) task_log_cb_success(result, task_id, vm=vm, **result['meta']) return result
def vm_replica_failover_cb(result, task_id, vm_uuid=None, slave_vm_uuid=None): """ A callback function for api.vm.replica.views.vm_replica_failover. """ slave_vm = SlaveVm.get_by_uuid(slave_vm_uuid, sr=( 'vm', 'master_vm', 'vm__node', 'vm__dc', )) vm = slave_vm.master_vm assert vm.uuid == vm_uuid action = result['meta']['apiview']['method'] force = result['meta']['apiview']['force'] result, jsons = _parse_vm_replica_result(result, vm, slave_vm, action, key_json_idx=-1, cb_name='vm_replica_failover') sync_status = _save_svc_state(slave_vm, jsons) if result['returncode'] != 0: if sync_status is not None: slave_vm.save(update_fields=('sync_status', )) vm.revert_notready() msg = result['detail'] logger.error( 'Found nonzero returncode in result from %s vm_replica_failover(%s, %s). Error: %s', action, vm_uuid, slave_vm_uuid, msg) errmsg = _update_task_result_failure(result, msg) raise TaskException(result, errmsg) # New master VM was born # Delete tasks for old master if force: tasks = list(vm.tasks.keys()) try: tasks.remove(task_id) except ValueError: pass _delete_tasks(vm, tasks) # Create internal shutdown task of old master VM old_vm_status = result['meta']['apiview']['orig_status'] _vm_shutdown(vm) # Save new master, degrade old master slave_vm.master_vm.revert_notready(save=False) new_vm = slave_vm.fail_over() # Re-check status of old master (current degraded slave) because it was shut down, # but the state wasn't save (it was notready back then) vm_status_one(task_id, vm) # Continue with prompting of new master and degradation of old SlaveVm.switch_vm_snapshots_node_storages(new_vm, nss=vm.get_node_storages()) # Force update of zabbix vm_json_active_changed.send(task_id, vm=new_vm, old_json_active={}, force_update=True) # Signal! if new_vm.node != vm.node: vm_node_changed.send(task_id, vm=new_vm, force_update=True) # Signal! msg = 'Server replica was successfully promoted to master' _update_task_result_success(result, slave_vm, action, msg) task_log_cb_success(result, task_id, vm=new_vm, **result['meta']) request = get_dummy_request(vm.dc, method='PUT', system_user=True) # Mark pending backups as "lost" :( TODO: implement vm_backup_sync new_vm.backup_set.filter(status=Backup.PENDING).update(status=Backup.LOST) # Sync snapshots on new master VM (mark missing snapshots as "lost") for disk_id, _ in enumerate(new_vm.json_active_get_disks(), start=1): call_api_view(request, 'PUT', vm_snapshot_list, new_vm.hostname, data={'disk_id': disk_id}, log_response=True) if old_vm_status == Vm.RUNNING: # Start new master VM call_api_view(request, 'PUT', vm_status, new_vm.hostname, action='start', log_response=True) return result
def image_manage_cb(result, task_id, image_uuid=None, vm_uuid=None, snap_id=None, delete_node_image_tasks=None): """ A callback function for api.image.base.views.image_manage and api.image.base.views.image_snapshot. """ img = Image.objects.select_related('dc_bound', 'owner').get(uuid=image_uuid) apiview = result['meta']['apiview'] action = apiview['method'] json = result.pop('json', None) if snap_id: snap = Snapshot.objects.get(id=snap_id) else: snap = None if result['returncode'] == 0: if action == 'POST': if vm_uuid: # save json from esimg if image_snapshot view is called try: data = img.json.load(json) except Exception as e: # The image won't be usable, but we wont raise an exception logger.error( 'Could not parse json output from POST image_snapshot(%s, %s, %s). Error: %s', vm_uuid, snap_id, img, e) else: img.manifest = data img.status = Image.OK img.manifest_active = img.manifest img.save() if snap: snap.save_status(Image.OK) elif action == 'PUT': img.status = Image.OK img.manifest_active = img.manifest img.save() elif action == 'DELETE': wait_for_delete_node_image_tasks(img, delete_node_image_tasks) img.delete() else: _image_manage_cb_failed(result, task_id, img, action, snap=snap) # Rollback msg = result.get('message', '') logger.error( 'Found nonzero returncode in result from %s %s(%s, %s, %s). Error: %s', action, apiview['view'], img, vm_uuid, snap_id, msg) raise TaskException(result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg), obj=img) task_log_cb_success(result, task_id, obj=img, **result['meta']) return result
def vm_status_cb(result, task_id, vm_uuid=None): """ A callback function for PUT api.vm.status.views.vm_status. Always updates the VM's status in DB. """ vm = Vm.objects.get(uuid=vm_uuid) msg = result.get('message', '') json = result.pop('json', None) if result['returncode'] == 0 and msg and msg.find('Successfully') == 0: # json was updated if result['meta']['apiview']['update'] and msg.find( 'Successfully updated') == 0: try: # save json from smartos json_active = vm.json.load(json) vm_delete_snapshots_of_removed_disks( vm) # Do this before updating json and json_active vm.json_active = json_active vm.json = json_active except Exception as e: logger.exception(e) logger.error( 'Could not parse json output from vm_status(%s). Error: %s', vm_uuid, e) else: vm.save(update_node_resources=True, update_storage_resources=True, update_fields=('enc_json', 'enc_json_active', 'changed')) vm_update_ipaddress_usage(vm) vm_json_active_changed.send(task_id, vm=vm) # Signal! change_time = _get_task_time(result, 'exec_time') if msg.find('Successfully started') >= 0: new_status = Vm.RUNNING elif msg.find('Successfully completed stop') >= 0: if result['meta']['apiview']['freeze']: new_status = Vm.FROZEN change_time = _get_task_time( result, 'finish_time') # Force status save else: new_status = Vm.STOPPED elif msg.find('Successfully completed reboot') >= 0: new_status = Vm.RUNNING else: logger.error( 'Did not find successful status change in result from vm_status(%s). Error: %s', vm_uuid, msg) raise TaskException(result, 'Unknown status (%s)' % msg) else: logger.error( 'Found nonzero returncode in result from vm_status(%s). Error: %s', vm_uuid, msg) if is_vm_missing(vm, msg): logger.critical('VM %s has vanished from compute node!', vm_uuid) if vm.status == Vm.STOPPING: _save_vm_status(task_id, vm, Vm.STOPPED, change_time=_get_task_time( result, 'finish_time')) else: _vm_status_cb_failed(result, task_id, vm) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) _save_vm_status(task_id, vm, new_status, change_time=change_time) task_log_cb_success(result, task_id, vm=vm, **result['meta']) return result
def vm_replica_cb(result, task_id, vm_uuid=None, slave_vm_uuid=None): """ A callback function for api.vm.replica.views.vm_replica. """ slave_vm = SlaveVm.get_by_uuid(slave_vm_uuid) vm = slave_vm.master_vm assert vm.uuid == vm_uuid action = result['meta']['apiview']['method'] result, jsons = _parse_vm_replica_result(result, vm, slave_vm, action) if action == 'POST': vm.revert_notready() if jsons and jsons[0].get('success', False): esrep_init = jsons[0] # New slave VM was successfully created on target node # noinspection PyTypeChecker json_active = pickle.loads( base64.decodestring(esrep_init.pop('slave_json'))) slave_vm.vm.json = slave_vm.vm.json_active = json_active slave_vm.vm.status = Vm.STOPPED slave_vm.vm.save(update_fields=('status', 'status_change', 'enc_json', 'enc_json_active', 'changed')) slave_vm.last_sync = _parse_last_sync(esrep_init) else: slave_vm.delete() sync_status = _save_svc_state(slave_vm, jsons) msg = result['detail'] if result['returncode'] == 0 and jsons: if action == 'POST': _save_svc_params(slave_vm, jsons) slave_vm.save() msg = 'Server replica was successfully initialized' elif action == 'PUT': _save_svc_params(slave_vm, jsons) slave_vm.save() msg = 'Server replication service was successfully updated' elif action == 'DELETE': slave_vm.delete() msg = 'Server replica was successfully destroyed' # noinspection PyTypeChecker if len(jsons[-1]['master_cleaned_disks']) != len( vm.json_active_get_disks()): warning = "WARNING: Master server's disks were not cleaned properly" result['detail'] += ' msg=' + warning msg += '; ' + warning else: if sync_status is not None: slave_vm.save(update_fields=('sync_status', )) logger.error( 'Found nonzero returncode in result from %s vm_replica(%s, %s). Error: %s', action, vm_uuid, slave_vm_uuid, msg) errmsg = _update_task_result_failure(result, msg) raise TaskException(result, errmsg) _update_task_result_success(result, slave_vm, action, msg) task_log_cb_success(result, task_id, vm=vm, **result['meta']) return result
def vm_backup_list_cb(result, task_id, vm_uuid=None, node_uuid=None, bkp_ids=None): """ A callback function for DELETE api.vm.backup.views.vm_backup_list. """ bkps = Backup.objects.select_related('vm', 'dc').filter(id__in=bkp_ids) _bkp = bkps[0] action = result['meta']['apiview']['method'] json = result.pop('json', '') message = result.get('message', json) data = {} obj_id = vm_uuid or node_uuid success = False try: # save json from esbackup data = _bkp.json.load(json) except Exception as e: logger.error( 'Could not parse json output from %s vm_backup_list(%s, %s). Error: %s', action, obj_id, bkps, e) result['detail'] = message or json else: success = data.get('success', False) try: result['detail'] = _vm_backup_cb_detail(data) except Exception as ex: logger.exception(ex) result['detail'] = json.replace('\n', '') if _bkp.type == Backup.DATASET: _vm_backup_update_snapshots(data, 'written', 'size') # Update size of remaining backups _vm_backup_deleted_last_snapshot_names( data) # Remove last flag from deleted snapshots if result['returncode'] == 0 and success: if action == 'DELETE': bkps.delete() _bkp.update_zpool_resources() result['message'] = 'Backups successfully deleted' else: # noinspection PyTypeChecker files = [i['name'] for i in data.get('files', [])] if files: deleted_bkps = bkps.filter(file_path__in=files) logger.warning( 'Only some backups were deleted in %s vm_backup_list(%s, %s): "%s"', action, obj_id, bkps, deleted_bkps) deleted_bkps.delete() _bkp.update_zpool_resources() _vm_backup_list_cb_failed(result, task_id, bkps, action) msg = data.get('msg', message) logger.error( 'Found nonzero returncode in result from %s vm_backup_list(%s, %s). Error: %s', action, obj_id, bkps, msg) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) task_log_cb_success(result, task_id, obj=_bkp.vm or _bkp.node, **result['meta']) return result
def vm_migrate_cb(result, task_id, vm_uuid=None, slave_vm_uuid=None): """ A callback function for api.vm.migrate.views.vm_migrate. """ ghost_vm = SlaveVm.get_by_uuid(slave_vm_uuid) msg = result.get('message', '') if result['returncode'] == 0 and msg and 'Successfully migrated' in msg: # Save node and delete placeholder VM first node = ghost_vm.vm.node nss = set(ghost_vm.vm.get_node_storages()) ghost_vm.delete( ) # post_delete signal will update node and storage resources # Fetch VM after ghost_vm is deleted, because it updates vm.slave_vms array vm = Vm.objects.select_related('node', 'dc').get(uuid=vm_uuid) changing_node = vm.node != ghost_vm.vm.node json = result.pop('json', None) try: # save json from smartos json_active = vm.json.load(json) vm.json_active = json_active vm.json = json_active except Exception as e: logger.exception(e) logger.error( 'Could not parse json output from vm_migrate(%s). Error: %s', vm_uuid, e) raise TaskException(result, 'Could not parse json output') nss.update(list(vm.get_node_storages())) # Revert status and set new node (should trigger node resource update) vm.revert_notready(save=False) if changing_node: vm.set_node(node) vm.save(update_node_resources=True, update_storage_resources=nss) SlaveVm.switch_vm_snapshots_node_storages(vm, nss=nss) vm_node_changed.send(task_id, vm=vm, force_update=True) # Signal! else: vm = Vm.objects.get(uuid=vm_uuid) _vm_migrate_cb_failed(result, task_id, vm, ghost_vm) logger.error( 'Found nonzero returncode in result from vm_migrate(%s). Error: %s', vm_uuid, msg) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) task_log_cb_success(result, task_id, vm=vm, **result['meta']) if vm.json_changed(): logger.info( 'Running PUT vm_manage(%s), because something (vnc port?) has changed changed', vm) from api.vm.base.views import vm_manage from api.utils.request import get_dummy_request from api.utils.views import call_api_view request = get_dummy_request(vm.dc, method='PUT', system_user=True) res = call_api_view(request, 'PUT', vm_manage, vm.hostname) if res.status_code == 201: logger.warn('PUT vm_manage(%s) was successful: %s', vm, res.data) else: logger.error('PUT vm_manage(%s) failed: %s (%s): %s', vm, res.status_code, res.status_text, res.data) return result
def vm_create_cb(result, task_id, vm_uuid=None): """ A callback function for api.vm.base.views.vm_manage. """ vm = Vm.objects.select_related('dc').get(uuid=vm_uuid) msg = result.get('message', '') if result['returncode'] == 0 and msg.find('Successfully created') >= 0: json = result.pop('json', None) try: # save json from smartos json_active = vm.json.load(json) vm.json_active = json_active vm.json = json_active if result['meta']['apiview']['recreate']: Snapshot.objects.filter(vm=vm).delete() SnapshotDefine.objects.filter(vm=vm).delete() BackupDefine.objects.filter(vm=vm).delete() vm.save_metadata('installed', False, save=False) except Exception as e: logger.error( 'Could not parse json output from POST vm_manage(%s). Error: %s', vm_uuid, e) _vm_error(task_id, vm) logger.exception(e) raise TaskException(result, 'Could not parse json output') else: # save all vm.save(update_node_resources=True, update_storage_resources=True) vm_update_ipaddress_usage(vm) # vm_json_active_changed.send(task_id, vm=vm) # Signal! -> not needed because vm_deployed is called below vm_created.send(task_id, vm=vm) # Signal! if msg.find('Successfully started' ) < 0: # VM was created, but could not be started logger.error( 'VM %s was created, but could not be started! Error: %s', vm_uuid, msg) _vm_error(task_id, vm) raise TaskException(result, 'Initial start failed (%s)' % msg) sendmail(vm.owner, 'vm/base/vm_create_subject.txt', 'vm/base/vm_create_email.txt', extra_context={'vm': vm}, user_i18n=True, dc=vm.dc, fail_silently=True) else: logger.error( 'Found nonzero returncode in result from POST vm_manage(%s). Error: %s', vm_uuid, msg) # Revert status and inform user _vm_create_cb_failed(result, task_id, vm) if result['meta']['apiview']['recreate'] and msg.find( 'Successfully deleted') >= 0: _vm_error(task_id, vm) # Something went terribly wrong # and FAIL this task raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) # So far so good. Now wait for deploy_over in vm_status_event_cb logger.info('VM %s is waiting for deploy_over...', vm_uuid) timer = 0 repeat = 0 while not vm.has_deploy_finished(): if timer > VMS_VM_DEPLOY_TOOLONG: # 10 minutes is too long if repeat == VMS_VM_DEPLOY_TOOLONG_MAX_CYCLES: # 20 minutes is really too long logger.error('VM %s deploy process has timed out!', vm_uuid) _vm_error(task_id, vm) result['message'] = 'VM %s deploy has timed out' % vm.hostname task_log_cb_error(result, task_id, vm=vm, **result['meta']) return result repeat += 1 timer = 0 logger.error( 'VM %s takes too long to deploy. Sending force stop/start', vm_uuid) # noinspection PyUnusedLocal tid, err = vm_reset(vm) sleep(3.0) timer += 3 logger.info('VM %s is completely deployed!', vm_uuid) internal_metadata = vm.json.get( 'internal_metadata', {}).copy() # save internal_metadata for email vm = Vm.objects.select_related('dc', 'template').get(pk=vm.pk) # Reload vm vm_deployed.send(task_id, vm=vm) # Signal! sendmail(vm.owner, 'vm/base/vm_deploy_subject.txt', 'vm/base/vm_deploy_email.txt', fail_silently=True, extra_context={ 'vm': vm, 'internal_metadata': internal_metadata }, user_i18n=True, dc=vm.dc) try: result['message'] = '\n'.join(result['message'].strip().split('\n') [:-1]) # Remove "started" stuff except Exception as e: logger.exception(e) task_log_cb_success(result, task_id, vm=vm, **result['meta']) try: if vm.template: # Try to create snapshot/backup definitions defined by template vm_define_snapshot, vm_define_backup = vm.template.vm_define_snapshot, vm.template.vm_define_backup if vm_define_snapshot or vm_define_backup: user = User.objects.get(id=user_id_from_task_id(task_id)) request = get_dummy_request(vm.dc, method='POST', user=user) SnapshotDefineView.create_from_template(request, vm, vm_define_snapshot, log=logger) BackupDefineView.create_from_template(request, vm, vm_define_backup, log=logger) except Exception as e: logger.exception(e) return result
def vm_backup_cb(result, task_id, vm_uuid=None, node_uuid=None, bkp_id=None): """ A callback function for api.vm.backup.views.vm_backup. """ bkp = Backup.objects.select_related('vm', 'dc').get(id=bkp_id) action = result['meta']['apiview']['method'] json = result.pop('json', '') message = result.get('message', json) data = {} obj_id = vm_uuid or node_uuid success = False try: # save json from esbackup data = bkp.json.load(json) except Exception as e: logger.error( 'Could not parse json output from %s vm_backup(%s, %s). Error: %s', action, obj_id, bkp, e) result['detail'] = message or json else: success = data.get('success', False) try: result['detail'] = _vm_backup_cb_detail(data) except Exception as ex: logger.exception(ex) result['detail'] = json.replace('\n', '') msg = data.get('msg', message) if action == 'PUT': vm = Vm.objects.get(uuid=vm_uuid) obj = vm else: vm = None obj = bkp.vm or bkp.node if bkp.type == Backup.DATASET: if action == 'POST': _vm_backup_update_snapshots( data, 'new_name', 'file_path') # Update file_path of archived backups _vm_backup_deleted_last_snapshot_names( data) # Remove last flag from deleted snapshots elif action == 'DELETE': _vm_backup_update_snapshots( data, 'written', 'size') # Update size of remaining backups _vm_backup_deleted_last_snapshot_names( data) # Remove last flag from deleted snapshots if result['returncode'] == 0 and success: if action == 'POST': if bkp.type == Backup.DATASET: bkp.file_path = data.get('backup_snapshot', '') bkp.size = data.get('backup_snapshot_size', None) if data.get('last_snapshot_name', None): bkp.last = True else: bkp.file_path = data.get('file', '') bkp.size = data.get('size', None) bkp.checksum = data.get('checksum', '') result['message'] = 'Backup successfully created' if bkp.fsfreeze: if 'freeze failed' in msg: bkp.fsfreeze = False result['message'] += ' (filesystem freeze failed)' MonitoringBackend.vm_send_alert( bkp.vm, 'Backup %s of server %s@disk-%s was created, but filesystem freeze ' 'failed.' % (bkp.name, bkp.vm.hostname, bkp.array_disk_id), priority=MonitoringBackend.WARNING) bkp.manifest_path = data.get('metadata_file', '') bkp.time = data.get('time_elapsed', None) bkp.status = bkp.OK bkp.save() if bkp.define and bkp.define.retention: # Retention - delete oldest snapshot assert bkp.vm == bkp.define.vm assert bkp.disk_id == bkp.define.disk_id from api.vm.backup.views import vm_backup_list _delete_oldest(Backup, bkp.define, vm_backup_list, 'bkpnames', task_id, LOG_BKPS_DELETE) bkp.update_zpool_resources() elif action == 'PUT': bkp.status = bkp.OK bkp.save_status() if result['meta']['apiview']['force']: # Remove all snapshots disk = vm.json_active_get_disks()[ result['meta']['apiview']['target_disk_id'] - 1] real_disk_id = Snapshot.get_real_disk_id(disk) # TODO: check indexes Snapshot.objects.filter(vm=vm, disk_id=real_disk_id).delete() vm.revert_notready() result['message'] = 'Backup successfully restored' elif action == 'DELETE': bkp.delete() bkp.update_zpool_resources() result['message'] = 'Backup successfully deleted' else: _vm_backup_cb_failed(result, task_id, bkp, action, vm=vm) # Delete backup or update backup status logger.error( 'Found nonzero returncode in result from %s vm_backup(%s, %s). Error: %s', action, obj_id, bkp, msg) raise TaskException(result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg), bkp=bkp) task_log_cb_success(result, task_id, obj=obj, **result['meta']) return result
def vm_snapshot_sync_cb(result, task_id, vm_uuid=None, disk_id=None): """ A callback function for PUT api.vm.snapshot.views.vm_snapshot_list a.k.a. vm_snapshot_sync. """ vm = Vm.objects.select_related('dc').get(uuid=vm_uuid) data = result.pop('data', '') if result['returncode'] != 0: msg = result.get('message', '') or data logger.error( 'Found nonzero returncode in result from PUT vm_snapshot_list(%s). Error: %s', vm_uuid, msg) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) node_snaps = parse_node_snaps(data) logger.info('Found %d snapshots for VM %s on disk ID %s', len(node_snaps), vm, disk_id) lost = sync_snapshots( vm.snapshot_set.select_related('vm').filter(disk_id=disk_id).all(), node_snaps) # Remaining snapshots on compute node are internal or old lost snapshots which do not exist in DB # remaining es- and as- snapshots must be created in DB; some is- and rs- could be probably removed, but # these are hard to determine, so we are ignoring them snap_prefix = Snapshot.USER_PREFIX new_snaps = { snap: node_snaps.pop(snap) for snap in tuple(node_snaps.keys()) if snap.startswith(snap_prefix) } ns = vm.get_node_storage(disk_id) if new_snaps: logger.warn( 'VM %s has following snapshots on disk ID %s, which are not defined in DB: %s', vm, disk_id, new_snaps.keys()) for zfs_name, info in new_snaps.items(): try: name = info[2] if not name: raise IndexError except IndexError: name = info[0] try: Snapshot.create_from_zfs_name(zfs_name, name=name, timestamp=int(info[0]), vm=vm, disk_id=disk_id, zpool=ns, size=t_long(info[1]), note='Found by snapshot sync') except Exception as exc: logger.error( 'Could not recreate snapshot %s (vm=%s, disk_id=%s). Error: %s', zfs_name, vm, disk_id, exc) else: logger.warn('Recreated snapshot %s (vm=%s, disk_id=%s)', zfs_name, vm, disk_id) logger.info( 'VM %s has following internal/service snapshots on disk ID %s: %s', vm, disk_id, node_snaps.keys()) # Update node storage snapshot size counters Snapshot.update_resources(ns, vm) try: # Update last flag on dataset backups bkp_ids = [snap[3:] for snap in node_snaps if snap.startswith('is-')] if bkp_ids: vm.backup_set.filter(disk_id=disk_id, id__in=bkp_ids).update(last=True) vm.backup_set.filter( disk_id=disk_id, last=True).exclude(id__in=bkp_ids).update(last=False) else: vm.backup_set.filter(disk_id=disk_id, last=True).update(last=False) except Exception as exc: logger.exception(exc) msg = 'Snapshots successfully synced' if lost: msg += '; WARNING: %d snapshot(s) lost' % lost if new_snaps: msg += '; WARNING: %d snapshot(s) found' % len(new_snaps) result['message'] = msg task_log_cb_success(result, task_id, vm=vm, **result['meta']) return result
def vm_snapshot_cb(result, task_id, vm_uuid=None, snap_id=None): """ A callback function for api.vm.snapshot.views.vm_snapshot. """ snap = Snapshot.objects.select_related('vm').get(id=snap_id) vm = snap.vm action = result['meta']['apiview']['method'] msg = result.get('message', '') if result['returncode'] == 0: if msg: result['detail'] = 'msg=' + to_string(msg) else: result['detail'] = '' if action == 'POST': snap.status = snap.OK result['message'] = 'Snapshot successfully created' if snap.fsfreeze: if 'freeze failed' in msg: snap.fsfreeze = False result['message'] += ' (filesystem freeze failed)' MonitoringBackend.vm_send_alert( vm, 'Snapshot %s of server %s@disk-%s was created, but filesystem ' 'freeze failed.' % (snap.name, vm.hostname, snap.array_disk_id), priority=MonitoringBackend.WARNING) snap.save(update_fields=('status', 'fsfreeze')) if snap.define and snap.define.retention: # Retention - delete oldest snapshot assert vm == snap.define.vm assert snap.disk_id == snap.define.disk_id from api.vm.snapshot.views import vm_snapshot_list _delete_oldest(Snapshot, snap.define, vm_snapshot_list, 'snapnames', task_id, LOG_SNAPS_DELETE) elif action == 'PUT': snap.status = snap.OK snap.save_status() if result['meta']['apiview']['force']: # TODO: check indexes Snapshot.objects.filter(vm=vm, disk_id=snap.disk_id, id__gt=snap.id).delete() vm.revert_notready() result['message'] = 'Snapshot successfully restored' elif action == 'DELETE': snap.delete() result['message'] = 'Snapshot successfully deleted' else: _vm_snapshot_cb_failed( result, task_id, snap, action) # Delete snapshot or update snapshot status logger.error( 'Found nonzero returncode in result from %s vm_snapshot(%s, %s). Error: %s', action, vm_uuid, snap, msg) raise TaskException(result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg), snap=snap) task_log_cb_success(result, task_id, vm=vm, **result['meta']) return result
def node_sysinfo_cb(result, task_id, node_uuid=None): """ A callback function for updating Node.json (sysinfo). node_uuid will be set only if called via API or GUI """ # in case the callback is called by restarting erigonesd:fast service on compute node, the meta dict lacks # a lot of information; msg is required as part of exception logging inside callback decorator # therefore we set it explicitly result['meta']['msg'] = LOG_NODE_UPDATE if result['returncode'] != 0: logger.error( 'Found nonzero return code in result from esysinfo command on %s', node_uuid) raise TaskException(result, 'Got bad return code (%s)' % result['returncode']) stdout = result.pop('stdout', '') result.pop('stderr', None) node_new = False try: esysinfo = parse_esysinfo(stdout) img_sources = esysinfo.pop('img_sources') img_initial = esysinfo.pop('img_initial') except Exception as e: logger.error( 'Could not parse output from esysinfo command on %s. Error: %s', node_uuid, e) logger.exception(e) raise TaskException(result, 'Could not parse esysinfo output') else: uuid = esysinfo['sysinfo']['UUID'] try: node = Node.objects.get(uuid=uuid) except Node.DoesNotExist: # The head node must be in online state during the admin DC initialization and each compute node must be in # online state during ssh key exchange. node_new = True is_head = not Node.objects.exists() logger.warn('Creating NEW node from sysinfo output from %s', node_uuid) node = Node.create_from_sysinfo(uuid, esysinfo, status=Node.ONLINE, is_head=is_head) node_created.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully created new compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) sshkey_changed = bool(node.sshkey) if node.is_head: logger.warn( 'New node %s is the first node ever created - assuming head node status. ' 'Initializing mgmt system and creating admin DC', node) from api.system.init import init_mgmt try: init_mgmt(node, images=img_initial) except Exception as e: logger.exception(e) result[ 'message'] = 'Error while initializing admin datacenter (%s)' % e task_log_error(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) logger.info('Saving node %s IP address "%s" into admin network', node, node.ip_address) try: # We should proceed even if the IP address is not registered node.ip_address.save() except Exception as e: logger.exception(e) else: admin_net = node.ip_address.subnet # The network was updated by init_mgmt() # Reload Subnet object because it is cached inside node instance admin_net = admin_net.__class__.objects.get(pk=admin_net.pk) # We need a request object request = get_dummy_request(DefaultDc(), 'POST', system_user=True) record_cls = RecordView.Record if admin_net.dns_domain and admin_net.dns_domain == node.domain_name: logger.info('Creating forward A DNS record for node %s', node) # This will fail silently RecordView.add_or_update_record(request, record_cls.A, admin_net.dns_domain, node.hostname, node.address, task_id=task_id, related_obj=node) if admin_net.ptr_domain: logger.info('Creating reverse PTR DNS record for node %s', node) # This will fail silently RecordView.add_or_update_record(request, record_cls.PTR, admin_net.ptr_domain, record_cls.get_reverse( node.address), node.hostname, task_id=task_id, related_obj=node) else: sshkey_changed = node.sshkey_changed(esysinfo) if node.sysinfo_changed(esysinfo) or sshkey_changed: logger.warn('Updating node %s json with sysinfo output from %s', node, node_uuid) node.update_from_sysinfo(esysinfo) # Will save public SSH key too node_json_changed.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully updated compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_UPDATE, obj=node, task_result=result, update_user_tasks=True) else: result[ 'message'] = 'No changes detected on compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_UPDATE, obj=node, task_result=result, update_user_tasks=True) if sshkey_changed: logger.warn( 'SSH key has changed on node %s - creating authorized_keys synchronization tasks', node) try: run_node_authorized_keys_sync() except Exception as e: logger.exception(e) try: run_node_img_sources_sync(node, node_img_sources=img_sources) except Exception as e: logger.exception(e) if node_new: node.del_initializing() # Used by esdc-ee to change node status to unlicensed node_status = getattr(settings, 'VMS_NODE_STATUS_DEFAULT', None) if node_status: node.save_status( node_status) # Set node status (most probably to unlicensed) else: # Always run vm_status_all for an old compute node vm_status_all(task_id, node) # Sync snapshots and backup for every node storage try: NodeVmSnapshotList.sync(node) except Exception as e: logger.exception(e) return result
def vm_update_cb(result, task_id, vm_uuid=None, new_node_uuid=None): """ A callback function for api.vm.base.views.vm_manage. """ vm = Vm.objects.select_related('dc').get(uuid=vm_uuid) _vm_update_cb_done(result, task_id, vm) msg = result.get('message', '') force = result['meta']['apiview']['force'] if result['returncode'] == 0 and (force or msg.find('Successfully updated') >= 0): json = result.pop('json', None) try: # save json from smartos json_active = vm.json.load(json) except Exception as e: logger.exception(e) logger.error( 'Could not parse json output from PUT vm_manage(%s). Error: %s', vm_uuid, e) raise TaskException(result, 'Could not parse json output') vm_delete_snapshots_of_removed_disks( vm) # Do this before updating json and json_active vm.json = json_active update_fields = ['enc_json', 'enc_json_active', 'changed'] ignored_changed_vm_attrs = ( 'set_customer_metadata', 'remove_customer_metadata', 'create_timestamp', 'boot_timestamp', 'autoboot', 'vnc_port', 'update_disks', ) if new_node_uuid: update_dict = vm.json_update() for i in ignored_changed_vm_attrs: update_dict.pop(i, None) if update_dict: raise TaskException( result, 'VM definition on compute node differs from definition in DB in ' 'following attributes: %s' % ','.join(update_dict.keys())) update_fields.append('node_id') old_json_active = vm.json_active vm.json_active = json_active if new_node_uuid: node = Node.objects.get(uuid=new_node_uuid) vm.set_node(node) with transaction.atomic(): vm.save(update_node_resources=True, update_storage_resources=True, update_fields=update_fields) vm_update_ipaddress_usage(vm) vm_json_active_changed.send( task_id, vm=vm, old_json_active=old_json_active) # Signal! if new_node_uuid: vm_node_changed.send(task_id, vm=vm, force_update=True) # Signal! result[ 'message'] = 'Node association successfully changed on VM %s' % vm.hostname if vm.json_changed(): vm_update(vm) else: logger.error( 'Found nonzero returncode in result from PUT vm_manage(%s). Error: %s', vm_uuid, msg) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) task_log_cb_success(result, task_id, vm=vm, **result['meta']) return result
def harvest_vm_cb(result, task_id, node_uuid=None): node = Node.objects.get(uuid=node_uuid) dc = Dc.objects.get_by_id(dc_id_from_task_id(task_id)) err = result.pop('stderr', None) vms = [] vms_err = [] jsons = [] if result.pop('returncode', None) != 0 or err: logger.error( 'Found nonzero returncode in result from harvest_vm(%s). Error: %s', node, err) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], err)) for json in result.pop('stdout', '').split('||||'): json = json.strip() if json: try: jsons.append(PickleDict.load(json)) except Exception as e: logger.error( 'Could not parse json output from harvest_vm(%s). Error: %s', node, e) raise TaskException(result, 'Could not parse json output') if not jsons: raise TaskException(result, 'Missing json output') request = get_dummy_request(dc, method='POST', system_user=True) for json in jsons: vm_uuid = json.get( 'uuid', None) # Bad uuid will be stopped later in vm_from_json() if vm_uuid: if Vm.objects.filter(uuid=vm_uuid).exists(): logger.warning('Ignoring VM %s found in harvest_vm(%s)', vm_uuid, node) continue try: vm = vm_from_json(request, task_id, json, dc, template=True, save=True, update_ips=True, update_dns=True) except Exception as e: logger.exception(e) logger.error('Could not load VM from json:\n"""%s"""', json) err_msg = 'Could not load server %s. Error: %s' % (vm_uuid, e) task_log_cb_error({'message': err_msg}, task_id, obj=node, **result['meta']) vms_err.append(vm_uuid) else: logger.info('Successfully saved new VM %s after harvest_vm(%s)', vm, node) vms.append(vm.hostname) vm_deployed.send(task_id, vm=vm) # Signal! (will update monitoring) if vm.json_changed(): try: _vm_update(vm) except Exception as e: logger.exception(e) if vms or not vms_err: if vms: result['message'] = 'Successfully harvested %s server(s) (%s)' % ( len(vms), ','.join(vms)) else: result['message'] = 'No new server found' task_log_cb_success(result, task_id, obj=node, **result['meta']) return result else: raise TaskException(result, 'Could not find or load any server')
def inner(task_obj, result, task_id, *args, **kwargs): try: # Just in case, there will be probably no callback information at this point del result['meta']['callback'] except KeyError: pass result['meta']['caller'] = task_id if meta_kwargs: result['meta'].update(meta_kwargs) # Issue #chili-512 # + skipping checking of parent task status when task is being retried if check_parent_status and not task_obj.request.retries: cb_name = fun.__name__ logger.debug( 'Waiting for parent task %s status, before running %s', task_id, cb_name) timer = 0 while timer < TASK_PARENT_STATUS_MAXWAIT: ar = cq.AsyncResult(task_id) if ar.ready(): logger.info( 'Parent task %s has finished with status=%s. Running %s', task_id, ar.status, cb_name) break timer += 1 logger.warning( 'Whoa! Parent task %s has not finished yet with status=%s. Waiting 1 second (%d), ' 'before running %s', task_id, ar.status, timer, cb_name) sleep(1.0) else: logger.error('Task %s is not ready. Running %s anyway :(', task_id, cb_name) try: return fun(result, task_id, *args, **kwargs) except OPERATIONAL_ERRORS as exc: raise exc # Caught by que.mgmt.MgmtCallbackTask except Exception as e: logger.exception(e) logger.error('Task %s failed', task_id) if not isinstance(e, TaskException): e = TaskException(result, str(e)) if log_exception or update_user_tasks: if e.obj is None: try: obj = get_vms_object(kwargs) except ObjectDoesNotExist: obj = None # noinspection PyProtectedMember _UserTasksModel._tasks_del( task_id) # Always remove user task else: obj = e.obj if log_exception: msg = e.result['meta'].get('msg', '') # Also removes user task in task_log task_log(task_id, msg, obj=obj, task_status=states.FAILURE, task_result=e.result) elif obj: # update_user_tasks obj.tasks_del(task_id) if error_fun: try: error_fun(result, task_id, task_exception=e, *args, **kwargs) except Exception as ex: logger.exception(ex) raise e finally: cb = UserCallback(task_id).load() if cb: logger.debug('Creating task for UserCallback[%s]: %s', task_id, cb) from api.task.tasks import task_user_callback_cb # Circular import task_user_callback_cb.call(task_id, cb, **kwargs)
def node_sysinfo_cb(result, task_id, node_uuid=None): """ A callback function for updating Node.json (sysinfo). node_uuid will be set only if called via API or GUI """ # in case the callback is called by restarting erigonesd:fast service on compute node, the meta dict lacks # a lot of information; msg is required as part of exception logging inside callback decorator # therefore we set it explicitly result['meta']['msg'] = LOG_NODE_UPDATE if result['returncode'] != 0: logger.error( 'Found nonzero return code in result from esysinfo command on %s', node_uuid) raise TaskException(result, 'Got bad return code (%s)' % result['returncode']) stdout = result.pop('stdout', '') result.pop('stderr', None) node_new = False try: esysinfo = parse_esysinfo(stdout) img_sources = esysinfo.pop('img_sources') img_initial = esysinfo.pop('img_initial') except Exception as e: logger.error( 'Could not parse output from esysinfo command on %s. Error: %s', node_uuid, e) logger.exception(e) raise TaskException(result, 'Could not parse esysinfo output') else: uuid = esysinfo['sysinfo']['UUID'] try: node = Node.objects.get(uuid=uuid) except Node.DoesNotExist: # The head node must be in online state during the admin DC initialization and each compute node must be in # online state during ssh key exchange. node_new = True is_head = not Node.objects.exists() logger.warn('Creating NEW node from sysinfo output from %s', node_uuid) node = Node.create_from_sysinfo(uuid, esysinfo, status=Node.ONLINE, is_head=is_head) node_created.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully created new compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) sshkey_changed = bool(node.sshkey) if node.is_head: logger.warn( 'New node %s is the first node ever created - assuming head node status. ' 'Initializing mgmt system and creating admin DC', node) from api.system.init import init_mgmt try: init_mgmt(node, images=img_initial) except Exception as e: logger.exception(e) result[ 'message'] = 'Error while initializing admin datacenter (%s)' % e task_log_error(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) try: _save_node_ip_address(task_id, node) except Exception as e: logger.exception(e) else: sshkey_changed = node.sshkey_changed(esysinfo) sysinfo_changed = node.sysinfo_changed(esysinfo) if sysinfo_changed or sshkey_changed: logger.warn('Updating node %s json with sysinfo output from %s', node, node_uuid) node.update_from_sysinfo(esysinfo) # Will save public SSH key too node_json_changed.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully updated compute node %s' % node.hostname else: node_json_unchanged.send(task_id, node=node) # Signal! result[ 'message'] = 'No changes detected on compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_UPDATE, obj=node, task_result=result, update_user_tasks=True) if sshkey_changed: logger.warn( 'SSH key has changed on node %s - creating authorized_keys synchronization tasks', node) try: run_node_authorized_keys_sync() except Exception as e: logger.exception(e) try: run_node_img_sources_sync(node, node_img_sources=img_sources) except Exception as e: logger.exception(e) if node_new: node.del_initializing() # Used by esdc-ee to change node status to unlicensed node_status = getattr(settings, 'VMS_NODE_STATUS_DEFAULT', None) if node_status: node.save_status( node_status) # Set node status (most probably to unlicensed) else: # Always run vm_status_all for an old compute node vm_status_all(task_id, node) # Sync snapshots and backup for every node storage try: NodeVmSnapshotList.sync(node) except Exception as e: logger.exception(e) # Refresh cached version information + emit event informing about restarted erigonesd:fast try: del node.system_version # Sometimes the node worker does not respond within the given timeout so we have to try more than once for i in range(5): if node.system_version: break logger.info('Node %s has system version %s', node, node.system_version) if owner_id_from_task_id(task_id) == TASK_USER: # internal user ID NodeSystemRestarted(node, system_version=node.system_version).send() except Exception as e: logger.exception(e) return result