def _delete_oldest(model, define, view_function, view_item, task_id, msg): """ Helper for finding oldest snapshots/backups and running DELETE view_function(). @type model: django.db.models.Model """ vm = define.vm # TODO: check indexes # noinspection PyUnresolvedReferences total = model.objects.filter(vm=vm, disk_id=define.disk_id, define=define, status=model.OK).count() to_delete = total - define.retention if to_delete < 1: return None # List of snapshot or backup names to delete TODO: check indexes # noinspection PyUnresolvedReferences oldest = model.objects.filter(vm=vm, disk_id=define.disk_id, define=define, status=model.OK)\ .values_list('name', flat=True).order_by('id')[:to_delete] view_name = view_function.__name__ view_data = {'disk_id': define.array_disk_id, view_item: tuple(oldest)} request = get_dummy_request(vm.dc, method='DELETE', system_user=True) request.define_id = define.id # Automatic task # Go! logger.info('Running DELETE %s(%s, %s), because %s>%s', view_name, vm, view_data, total, define.retention) res = call_api_view(request, 'DELETE', view_function, vm.hostname, data=view_data) if res.status_code in (200, 201): logger.warn('DELETE %s(%s, %s) was successful: %s', view_name, vm, view_data, res.data) else: logger.error('Running DELETE %s(%s, %s) failed: %s (%s): %s', view_name, vm, view_data, res.status_code, res.status_text, res.data) MonitoringBackend.vm_send_alert( vm, 'Automatic deletion of old %ss %s/disk-%s failed to start.' % (model.__name__.lower(), vm.hostname, define.array_disk_id)) # Need to log this, because nobody else does (+ there is no PENDING task) detail = 'hostname=%s, %s=%s, disk_id=%s, Error: %s' % ( vm.hostname, view_item, ','.join(oldest), define.array_disk_id, get_task_error_message(res.data)) task_log_error(task_id, msg, vm=vm, detail=detail, update_user_tasks=False) return res
def _vm_update(vm): logger.info('Running PUT vm_manage(%s)', vm) from api.vm.base.views import vm_manage request = get_dummy_request(vm.dc, method='PUT', system_user=True) res = call_api_view(request, 'PUT', vm_manage, vm.hostname) if res.status_code == 201: logger.info('PUT vm_manage(%s) was successful: %s', vm, res.data) else: logger.error('PUT vm_manage(%s) failed: %s (%s): %s', vm, res.status_code, res.status_text, res.data)
def vm_snapshot_beat(snap_define_id): """ This is a periodic beat task. Run POST vm_snapshot according to snapshot definition. """ from api.vm.snapshot.views import vm_snapshot snap_define = SnapshotDefine.objects.get(id=snap_define_id) snap_name = snap_define.generate_snapshot_name() vm = snap_define.vm disk_id = snap_define.array_disk_id request = get_dummy_request(vm.dc, method='POST', system_user=True) request.define_id = snap_define.id # Automatic task # Go! res = call_api_view(request, 'POST', vm_snapshot, vm.hostname, snap_name, data={ 'disk_id': disk_id, 'fsfreeze': snap_define.fsfreeze }) if res.status_code == 201: logger.info( 'POST vm_snapshot(%s, %s, {disk_id=%s}) was successful: %s', vm, snap_name, disk_id, res.data) else: # Need to log this, because nobody else does (+ there is no PENDING task) detail = 'snapname=%s, disk_id=%s, type=%s. Error: %s' % ( snap_name, disk_id, Snapshot.AUTO, get_task_error_message( res.data)) task_log_error(task_id_from_task_id(vm_snapshot_beat.request.id, dc_id=vm.dc.id), LOG_SNAP_CREATE, vm=vm, detail=detail, update_user_tasks=False) if res.status_code == HTTP_423_LOCKED: logger.warning( 'Running POST vm_snapshot(%s, %s, {disk_id=%s}) failed: %s (%s): %s', vm, snap_name, disk_id, res.status_code, res.status_text, res.data) else: logger.error( 'Running POST vm_snapshot(%s, %s, {disk_id=%s}) failed: %s (%s): %s', vm, snap_name, disk_id, res.status_code, res.status_text, res.data) MonitoringBackend.vm_send_alert( vm, 'Automatic snapshot %s/disk-%s@%s failed to start.' % (vm.hostname, disk_id, snap_define.name))
def vm_backup_beat(bkp_define_id): """ This is a periodic beat task. Run POST vm_backup according to backup definition. """ from api.vm.backup.views import vm_backup bkp_define = BackupDefine.objects.get(id=bkp_define_id) vm = bkp_define.vm disk_id = bkp_define.array_disk_id defname = bkp_define.name request = get_dummy_request(vm.dc, method='POST', system_user=True) request.define_id = bkp_define.id # Automatic task # Go! res = call_api_view(request, 'POST', vm_backup, vm.hostname, defname, data={ 'disk_id': disk_id, 'fsfreeze': bkp_define.fsfreeze }) if res.status_code == 201: logger.info('POST vm_backup(%s, %s, {disk_id=%s}) was successful: %s', vm, defname, disk_id, res.data) else: # Need to log this, because nobody else does (+ there is no PENDING task) detail = 'hostname=%s, bkpname=%s, disk_id=%s, Error: %s' % ( vm.hostname, bkp_define.generate_backup_name(), disk_id, get_task_error_message(res.data)) task_log_error(task_id_from_task_id(vm_backup_beat.request.id, dc_id=vm.dc.id), LOG_BKP_CREATE, vm=vm, detail=detail, update_user_tasks=False) if res.status_code == HTTP_423_LOCKED: logger.warning( 'Running POST vm_backup(%s, %s, {disk_id=%s}) failed: %s (%s): %s', vm, defname, disk_id, res.status_code, res.status_text, res.data) else: logger.error( 'Running POST vm_backup(%s, %s, {disk_id=%s}) failed: %s (%s): %s', vm, defname, disk_id, res.status_code, res.status_text, res.data) Zabbix.vm_send_alert( vm, 'Automatic backup %s/disk-%s@%s failed to start.' % (vm.hostname, disk_id, defname))
def vm_update(vm): """ Internal API used for updating VM if there were changes in jason detected. """ logger.info( 'Running PUT vm_manage(%s), because something (vnc port?) has changed changed', vm) from api.vm.base.views import vm_manage from api.utils.request import get_dummy_request from api.utils.views import call_api_view request = get_dummy_request(vm.dc, method='PUT', system_user=True) res = call_api_view(request, 'PUT', vm_manage, vm.hostname) if res.status_code == 201: logger.warn('PUT vm_manage(%s) was successful: %s', vm, res.data) else: logger.error('PUT vm_manage(%s) failed: %s (%s): %s', vm, res.status_code, res.status_text, res.data)
def _save_node_ip_address(task_id, node): """Helper function for saving IP address and creating DNS records of a new compute node""" assert node.address try: ip_address = node.create_ip_address() except IPAddress.DoesNotExist as exc: logger.warning( 'Could not save node %s IP address "%s" into admin network (%s)', node, node.address, exc) return logger.info('Saving node %s IP address "%s" into admin network', node, node.ip_address) ip_address.save() admin_net = node.ip_address.subnet # The network was updated by init_mgmt() # Reload Subnet object because it is cached inside node instance admin_net = admin_net.__class__.objects.get(pk=admin_net.pk) # We need a request object request = get_dummy_request(DefaultDc(), 'POST', system_user=True) record_cls = RecordView.Record if admin_net.dns_domain and admin_net.dns_domain == node.domain_name: logger.info('Creating forward A DNS record for node %s', node) # This will fail silently RecordView.add_or_update_record(request, record_cls.A, admin_net.dns_domain, node.hostname, node.address, task_id=task_id, related_obj=node) if admin_net.ptr_domain: logger.info('Creating reverse PTR DNS record for node %s', node) # This will fail silently RecordView.add_or_update_record(request, record_cls.PTR, admin_net.ptr_domain, record_cls.get_reverse(node.address), node.hostname, task_id=task_id, related_obj=node)
def sync(cls, node): """Run put() for all node storages on compute node""" request = get_dummy_request(DefaultDc(), method='PUT', system_user=True) data = {} result = {} for ns in node.nodestorage_set.all(): view = cls(request, ns, data) result[ns] = tid, err = view.put(internal=True) if err: logger.error( 'Failed to create node_vm_snapshot_sync task for %s@%s. Error: %s', ns.zpool, node.hostname, err) else: logger.info('Created node_vm_snapshot_sync task %s for %s@%s', tid, ns.zpool, node.hostname) return result
def vm_replica_failover_cb(result, task_id, vm_uuid=None, slave_vm_uuid=None): """ A callback function for api.vm.replica.views.vm_replica_failover. """ slave_vm = SlaveVm.get_by_uuid(slave_vm_uuid, sr=( 'vm', 'master_vm', 'vm__node', 'vm__dc', )) vm = slave_vm.master_vm assert vm.uuid == vm_uuid action = result['meta']['apiview']['method'] force = result['meta']['apiview']['force'] result, jsons = _parse_vm_replica_result(result, vm, slave_vm, action, key_json_idx=-1, cb_name='vm_replica_failover') sync_status = _save_svc_state(slave_vm, jsons) if result['returncode'] != 0: if sync_status is not None: slave_vm.save(update_fields=('sync_status', )) vm.revert_notready() msg = result['detail'] logger.error( 'Found nonzero returncode in result from %s vm_replica_failover(%s, %s). Error: %s', action, vm_uuid, slave_vm_uuid, msg) errmsg = _update_task_result_failure(result, msg) raise TaskException(result, errmsg) # New master VM was born # Delete tasks for old master if force: tasks = list(vm.tasks.keys()) try: tasks.remove(task_id) except ValueError: pass _delete_tasks(vm, tasks) # Create internal shutdown task of old master VM old_vm_status = result['meta']['apiview']['orig_status'] _vm_shutdown(vm) # Save new master, degrade old master slave_vm.master_vm.revert_notready(save=False) new_vm = slave_vm.fail_over() # Re-check status of old master (current degraded slave) because it was shut down, # but the state wasn't save (it was notready back then) vm_status_one(task_id, vm) # Continue with prompting of new master and degradation of old SlaveVm.switch_vm_snapshots_node_storages(new_vm, nss=vm.get_node_storages()) # Force update of zabbix vm_json_active_changed.send(task_id, vm=new_vm, old_json_active={}, force_update=True) # Signal! if new_vm.node != vm.node: vm_node_changed.send(task_id, vm=new_vm, force_update=True) # Signal! msg = 'Server replica was successfully promoted to master' _update_task_result_success(result, slave_vm, action, msg) task_log_cb_success(result, task_id, vm=new_vm, **result['meta']) request = get_dummy_request(vm.dc, method='PUT', system_user=True) # Mark pending backups as "lost" :( TODO: implement vm_backup_sync new_vm.backup_set.filter(status=Backup.PENDING).update(status=Backup.LOST) # Sync snapshots on new master VM (mark missing snapshots as "lost") for disk_id, _ in enumerate(new_vm.json_active_get_disks(), start=1): call_api_view(request, 'PUT', vm_snapshot_list, new_vm.hostname, data={'disk_id': disk_id}, log_response=True) if old_vm_status == Vm.RUNNING: # Start new master VM call_api_view(request, 'PUT', vm_status, new_vm.hostname, action='start', log_response=True) return result
def vm_create_cb(result, task_id, vm_uuid=None): """ A callback function for api.vm.base.views.vm_manage. """ vm = Vm.objects.select_related('dc').get(uuid=vm_uuid) msg = result.get('message', '') if result['returncode'] == 0 and msg.find('Successfully created') >= 0: json = result.pop('json', None) try: # save json from smartos json_active = vm.json.load(json) vm.json_active = json_active vm.json = json_active if result['meta']['apiview']['recreate']: Snapshot.objects.filter(vm=vm).delete() SnapshotDefine.objects.filter(vm=vm).delete() BackupDefine.objects.filter(vm=vm).delete() vm.save_metadata('installed', False, save=False) except Exception as e: logger.error( 'Could not parse json output from POST vm_manage(%s). Error: %s', vm_uuid, e) _vm_error(task_id, vm) logger.exception(e) raise TaskException(result, 'Could not parse json output') else: # save all vm.save(update_node_resources=True, update_storage_resources=True) vm_update_ipaddress_usage(vm) # vm_json_active_changed.send(task_id, vm=vm) # Signal! -> not needed because vm_deployed is called below vm_created.send(task_id, vm=vm) # Signal! if msg.find('Successfully started' ) < 0: # VM was created, but could not be started logger.error( 'VM %s was created, but could not be started! Error: %s', vm_uuid, msg) _vm_error(task_id, vm) raise TaskException(result, 'Initial start failed (%s)' % msg) sendmail(vm.owner, 'vm/base/vm_create_subject.txt', 'vm/base/vm_create_email.txt', extra_context={'vm': vm}, user_i18n=True, dc=vm.dc, fail_silently=True) else: logger.error( 'Found nonzero returncode in result from POST vm_manage(%s). Error: %s', vm_uuid, msg) # Revert status and inform user _vm_create_cb_failed(result, task_id, vm) if result['meta']['apiview']['recreate'] and msg.find( 'Successfully deleted') >= 0: _vm_error(task_id, vm) # Something went terribly wrong # and FAIL this task raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) # So far so good. Now wait for deploy_over in vm_status_event_cb logger.info('VM %s is waiting for deploy_over...', vm_uuid) timer = 0 repeat = 0 while not vm.has_deploy_finished(): if timer > VMS_VM_DEPLOY_TOOLONG: # 10 minutes is too long if repeat == VMS_VM_DEPLOY_TOOLONG_MAX_CYCLES: # 20 minutes is really too long logger.error('VM %s deploy process has timed out!', vm_uuid) _vm_error(task_id, vm) result['message'] = 'VM %s deploy has timed out' % vm.hostname task_log_cb_error(result, task_id, vm=vm, **result['meta']) return result repeat += 1 timer = 0 logger.error( 'VM %s takes too long to deploy. Sending force stop/start', vm_uuid) # noinspection PyUnusedLocal tid, err = vm_reset(vm) sleep(3.0) timer += 3 logger.info('VM %s is completely deployed!', vm_uuid) internal_metadata = vm.json.get( 'internal_metadata', {}).copy() # save internal_metadata for email vm = Vm.objects.select_related('dc', 'template').get(pk=vm.pk) # Reload vm vm_deployed.send(task_id, vm=vm) # Signal! sendmail(vm.owner, 'vm/base/vm_deploy_subject.txt', 'vm/base/vm_deploy_email.txt', fail_silently=True, extra_context={ 'vm': vm, 'internal_metadata': internal_metadata }, user_i18n=True, dc=vm.dc) try: result['message'] = '\n'.join(result['message'].strip().split('\n') [:-1]) # Remove "started" stuff except Exception as e: logger.exception(e) task_log_cb_success(result, task_id, vm=vm, **result['meta']) try: if vm.template: # Try to create snapshot/backup definitions defined by template vm_define_snapshot, vm_define_backup = vm.template.vm_define_snapshot, vm.template.vm_define_backup if vm_define_snapshot or vm_define_backup: user = User.objects.get(id=user_id_from_task_id(task_id)) request = get_dummy_request(vm.dc, method='POST', user=user) SnapshotDefineView.create_from_template(request, vm, vm_define_snapshot, log=logger) BackupDefineView.create_from_template(request, vm, vm_define_backup, log=logger) except Exception as e: logger.exception(e) return result
def node_sysinfo_cb(result, task_id, node_uuid=None): """ A callback function for updating Node.json (sysinfo). node_uuid will be set only if called via API or GUI """ # in case the callback is called by restarting erigonesd:fast service on compute node, the meta dict lacks # a lot of information; msg is required as part of exception logging inside callback decorator # therefore we set it explicitly result['meta']['msg'] = LOG_NODE_UPDATE if result['returncode'] != 0: logger.error( 'Found nonzero return code in result from esysinfo command on %s', node_uuid) raise TaskException(result, 'Got bad return code (%s)' % result['returncode']) stdout = result.pop('stdout', '') result.pop('stderr', None) node_new = False try: esysinfo = parse_esysinfo(stdout) img_sources = esysinfo.pop('img_sources') img_initial = esysinfo.pop('img_initial') except Exception as e: logger.error( 'Could not parse output from esysinfo command on %s. Error: %s', node_uuid, e) logger.exception(e) raise TaskException(result, 'Could not parse esysinfo output') else: uuid = esysinfo['sysinfo']['UUID'] try: node = Node.objects.get(uuid=uuid) except Node.DoesNotExist: # The head node must be in online state during the admin DC initialization and each compute node must be in # online state during ssh key exchange. node_new = True is_head = not Node.objects.exists() logger.warn('Creating NEW node from sysinfo output from %s', node_uuid) node = Node.create_from_sysinfo(uuid, esysinfo, status=Node.ONLINE, is_head=is_head) node_created.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully created new compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) sshkey_changed = bool(node.sshkey) if node.is_head: logger.warn( 'New node %s is the first node ever created - assuming head node status. ' 'Initializing mgmt system and creating admin DC', node) from api.system.init import init_mgmt try: init_mgmt(node, images=img_initial) except Exception as e: logger.exception(e) result[ 'message'] = 'Error while initializing admin datacenter (%s)' % e task_log_error(task_id, msg=LOG_NODE_CREATE, obj=node, task_result=result, update_user_tasks=True) logger.info('Saving node %s IP address "%s" into admin network', node, node.ip_address) try: # We should proceed even if the IP address is not registered node.ip_address.save() except Exception as e: logger.exception(e) else: admin_net = node.ip_address.subnet # The network was updated by init_mgmt() # Reload Subnet object because it is cached inside node instance admin_net = admin_net.__class__.objects.get(pk=admin_net.pk) # We need a request object request = get_dummy_request(DefaultDc(), 'POST', system_user=True) record_cls = RecordView.Record if admin_net.dns_domain and admin_net.dns_domain == node.domain_name: logger.info('Creating forward A DNS record for node %s', node) # This will fail silently RecordView.add_or_update_record(request, record_cls.A, admin_net.dns_domain, node.hostname, node.address, task_id=task_id, related_obj=node) if admin_net.ptr_domain: logger.info('Creating reverse PTR DNS record for node %s', node) # This will fail silently RecordView.add_or_update_record(request, record_cls.PTR, admin_net.ptr_domain, record_cls.get_reverse( node.address), node.hostname, task_id=task_id, related_obj=node) else: sshkey_changed = node.sshkey_changed(esysinfo) if node.sysinfo_changed(esysinfo) or sshkey_changed: logger.warn('Updating node %s json with sysinfo output from %s', node, node_uuid) node.update_from_sysinfo(esysinfo) # Will save public SSH key too node_json_changed.send(task_id, node=node) # Signal! result[ 'message'] = 'Successfully updated compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_UPDATE, obj=node, task_result=result, update_user_tasks=True) else: result[ 'message'] = 'No changes detected on compute node %s' % node.hostname task_log_success(task_id, msg=LOG_NODE_UPDATE, obj=node, task_result=result, update_user_tasks=True) if sshkey_changed: logger.warn( 'SSH key has changed on node %s - creating authorized_keys synchronization tasks', node) try: run_node_authorized_keys_sync() except Exception as e: logger.exception(e) try: run_node_img_sources_sync(node, node_img_sources=img_sources) except Exception as e: logger.exception(e) if node_new: node.del_initializing() # Used by esdc-ee to change node status to unlicensed node_status = getattr(settings, 'VMS_NODE_STATUS_DEFAULT', None) if node_status: node.save_status( node_status) # Set node status (most probably to unlicensed) else: # Always run vm_status_all for an old compute node vm_status_all(task_id, node) # Sync snapshots and backup for every node storage try: NodeVmSnapshotList.sync(node) except Exception as e: logger.exception(e) return result
def init_mgmt(head_node, images=None): """ Initialize the system and create the "admin" datacenter. """ from api.dc.views import dc_node, dc_settings, dc_domain from api.network.base.views import net_manage from api.dns.domain.views import dns_domain from api.node.vm.views import harvest_vm admin = settings.VMS_DC_ADMIN main = settings.VMS_DC_MAIN # Admin DC and default DC should already exist (initial_data) admin_dc = Dc.objects.get_by_name(admin) default_dc = Dc.objects.get_by_name(main) # We need some request with admin DC - important for all subsequent commands request = get_dummy_request(admin_dc, method='POST', system_user=True) # All api calls will use the POST method... api_post = partial(_api_cmd, request, 'POST') # ...except net_manage, dns_record and dc_settings api_put = partial(_api_cmd, request, 'PUT') # Initialize images if images and isinstance(images, list): logger.warn('Initializing %d images', len(images)) _init_images(head_node, images, default_dc, admin_dc) else: logger.error('Could not parse initial images or empty initial images') # Create DNS zone for the domain set during head node installation try: admin_zone = head_node.domain_name if admin_zone: api_post(dns_domain, admin_zone, owner=settings.ADMIN_USERNAME, dc_bound=False) except Exception as e: admin_zone = None logger.exception(e) # Setup miscellaneous stuff depending on admin network info try: mgmt_ifconfig = get_local_netinfo() mgmt_ip = mgmt_ifconfig['addr'] try: mgmt_net = ipaddress.ip_network(u'%(network)s/%(netmask)s' % mgmt_ifconfig) except Exception as exc: logger.exception(exc) else: try: # Create reverse dns domain ptr_zone = reverse_domain_from_network(mgmt_net) api_post(dns_domain, ptr_zone, owner=settings.ADMIN_USERNAME, dc_bound=False) api_post(dc_domain, ptr_zone, dc=main) api_post(dc_domain, ptr_zone, dc=admin) except Exception as exc: logger.exception(exc) else: # Set PTR zone for admin network mgmt_ifconfig['ptr_domain'] = ptr_zone # Change admin network subnet according to ip/netmask/gw on this machine (mgmt01.local) api_put(net_manage, settings.VMS_NET_ADMIN, dns_domain=admin_zone, **mgmt_ifconfig) # Change SITE_LINK and SITE_SIGNATURE both datacenters (#549, #551) site_link = 'https://%s' % mgmt_ip site_signature = settings.SITE_SIGNATURE.replace( settings.SITE_LINK, site_link) api_put(dc_settings, main, SITE_LINK=site_link, SITE_SIGNATURE=site_signature) api_put(dc_settings, admin, SITE_LINK=site_link, SITE_SIGNATURE=site_signature) _es_api_url(site_link) except Exception as e: logger.exception(e) # Add head node + all its storages into admin DC api_post(dc_node, head_node.hostname, strategy=DcNode.SHARED, add_storage=9, dc=admin) logger.warning('Admin datacenter "%s" was successfully initialized', admin_dc) # Harvest all VMs from head node into admin DC while True: ret = api_post(harvest_vm, head_node.hostname, dc=admin) if status.is_success(ret.status_code): logger.info('POST harvest_vm(%s) has started: %s', head_node.hostname, ret.data) break else: logger.error( 'POST harvest_vm(%s) has failed; retrying in 3 seconds', head_node.hostname) sleep(3) # The harvest is performing some other tasks asynchronously during which the node must stay in online state. # So let's sleep for some time to give the tasks some breathing space. logger.info( 'Sleeping for 60 seconds after admin datacenter initialization') sleep(60) # Let's update the default image server after we've harvested the VMS_VM_IMG01 try: if Vm.objects.filter(uuid=settings.VMS_VM_IMG01).exists(): vm_img01_uuid = settings.VMS_VM_IMG01 else: vm_img01_uuid = None if settings.VMS_IMAGE_VM == vm_img01_uuid: logger.info( 'The current image server (VMS_IMAGE_VM) is already set to %s', vm_img01_uuid) else: api_put(dc_settings, main, VMS_IMAGE_VM=vm_img01_uuid) except Exception as e: logger.exception(e) # We can change the default resolvers after we've harvested the VMS_VM_DNS01 (#chili-831) try: try: vm_dns01_ip = Vm.objects.get(uuid=settings.VMS_VM_DNS01).ips[0] except Vm.DoesNotExist: logger.warning('DNS VM (%s) not found - using default DNS servers', settings.VMS_VM_DNS01) else: api_put(dc_settings, main, VMS_VM_RESOLVERS_DEFAULT=[vm_dns01_ip]) api_put(dc_settings, admin, VMS_VM_RESOLVERS_DEFAULT=[vm_dns01_ip]) except Exception as e: logger.exception(e) return ret
def vm_migrate_cb(result, task_id, vm_uuid=None, slave_vm_uuid=None): """ A callback function for api.vm.migrate.views.vm_migrate. """ ghost_vm = SlaveVm.get_by_uuid(slave_vm_uuid) msg = result.get('message', '') if result['returncode'] == 0 and msg and 'Successfully migrated' in msg: # Save node and delete placeholder VM first node = ghost_vm.vm.node nss = set(ghost_vm.vm.get_node_storages()) ghost_vm.delete( ) # post_delete signal will update node and storage resources # Fetch VM after ghost_vm is deleted, because it updates vm.slave_vms array vm = Vm.objects.select_related('node', 'dc').get(uuid=vm_uuid) changing_node = vm.node != ghost_vm.vm.node json = result.pop('json', None) try: # save json from smartos json_active = vm.json.load(json) vm.json_active = json_active vm.json = json_active except Exception as e: logger.exception(e) logger.error( 'Could not parse json output from vm_migrate(%s). Error: %s', vm_uuid, e) raise TaskException(result, 'Could not parse json output') nss.update(list(vm.get_node_storages())) # Revert status and set new node (should trigger node resource update) vm.revert_notready(save=False) if changing_node: vm.set_node(node) vm.save(update_node_resources=True, update_storage_resources=nss) SlaveVm.switch_vm_snapshots_node_storages(vm, nss=nss) vm_node_changed.send(task_id, vm=vm, force_update=True) # Signal! else: vm = Vm.objects.get(uuid=vm_uuid) _vm_migrate_cb_failed(result, task_id, vm, ghost_vm) logger.error( 'Found nonzero returncode in result from vm_migrate(%s). Error: %s', vm_uuid, msg) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], msg)) task_log_cb_success(result, task_id, vm=vm, **result['meta']) if vm.json_changed(): logger.info( 'Running PUT vm_manage(%s), because something (vnc port?) has changed changed', vm) from api.vm.base.views import vm_manage from api.utils.request import get_dummy_request from api.utils.views import call_api_view request = get_dummy_request(vm.dc, method='PUT', system_user=True) res = call_api_view(request, 'PUT', vm_manage, vm.hostname) if res.status_code == 201: logger.warn('PUT vm_manage(%s) was successful: %s', vm, res.data) else: logger.error('PUT vm_manage(%s) failed: %s (%s): %s', vm, res.status_code, res.status_text, res.data) return result
def harvest_vm_cb(result, task_id, node_uuid=None): node = Node.objects.get(uuid=node_uuid) dc = Dc.objects.get_by_id(dc_id_from_task_id(task_id)) err = result.pop('stderr', None) vms = [] vms_err = [] jsons = [] if result.pop('returncode', None) != 0 or err: logger.error( 'Found nonzero returncode in result from harvest_vm(%s). Error: %s', node, err) raise TaskException( result, 'Got bad return code (%s). Error: %s' % (result['returncode'], err)) for json in result.pop('stdout', '').split('||||'): json = json.strip() if json: try: jsons.append(PickleDict.load(json)) except Exception as e: logger.error( 'Could not parse json output from harvest_vm(%s). Error: %s', node, e) raise TaskException(result, 'Could not parse json output') if not jsons: raise TaskException(result, 'Missing json output') request = get_dummy_request(dc, method='POST', system_user=True) for json in jsons: vm_uuid = json.get( 'uuid', None) # Bad uuid will be stopped later in vm_from_json() if vm_uuid: if Vm.objects.filter(uuid=vm_uuid).exists(): logger.warning('Ignoring VM %s found in harvest_vm(%s)', vm_uuid, node) continue try: vm = vm_from_json(request, task_id, json, dc, template=True, save=True, update_ips=True, update_dns=True) except Exception as e: logger.exception(e) logger.error('Could not load VM from json:\n"""%s"""', json) err_msg = 'Could not load server %s. Error: %s' % (vm_uuid, e) task_log_cb_error({'message': err_msg}, task_id, obj=node, **result['meta']) vms_err.append(vm_uuid) else: logger.info('Successfully saved new VM %s after harvest_vm(%s)', vm, node) vms.append(vm.hostname) vm_deployed.send(task_id, vm=vm) # Signal! (will update monitoring) if vm.json_changed(): try: _vm_update(vm) except Exception as e: logger.exception(e) if vms or not vms_err: if vms: result['message'] = 'Successfully harvested %s server(s) (%s)' % ( len(vms), ','.join(vms)) else: result['message'] = 'No new server found' task_log_cb_success(result, task_id, obj=node, **result['meta']) return result else: raise TaskException(result, 'Could not find or load any server')