Exemple #1
0
def _delete_oldest(model, define, view_function, view_item, task_id, msg):
    """
    Helper for finding oldest snapshots/backups and running DELETE view_function().

    @type model: django.db.models.Model
    """
    vm = define.vm
    # TODO: check indexes
    # noinspection PyUnresolvedReferences
    total = model.objects.filter(vm=vm,
                                 disk_id=define.disk_id,
                                 define=define,
                                 status=model.OK).count()
    to_delete = total - define.retention

    if to_delete < 1:
        return None

    # List of snapshot or backup names to delete TODO: check indexes
    # noinspection PyUnresolvedReferences
    oldest = model.objects.filter(vm=vm, disk_id=define.disk_id, define=define, status=model.OK)\
        .values_list('name', flat=True).order_by('id')[:to_delete]
    view_name = view_function.__name__
    view_data = {'disk_id': define.array_disk_id, view_item: tuple(oldest)}
    request = get_dummy_request(vm.dc, method='DELETE', system_user=True)
    request.define_id = define.id  # Automatic task
    # Go!
    logger.info('Running DELETE %s(%s, %s), because %s>%s', view_name, vm,
                view_data, total, define.retention)
    res = call_api_view(request,
                        'DELETE',
                        view_function,
                        vm.hostname,
                        data=view_data)

    if res.status_code in (200, 201):
        logger.warn('DELETE %s(%s, %s) was successful: %s', view_name, vm,
                    view_data, res.data)
    else:
        logger.error('Running DELETE %s(%s, %s) failed: %s (%s): %s',
                     view_name, vm, view_data, res.status_code,
                     res.status_text, res.data)
        MonitoringBackend.vm_send_alert(
            vm, 'Automatic deletion of old %ss %s/disk-%s failed to start.' %
            (model.__name__.lower(), vm.hostname, define.array_disk_id))
        # Need to log this, because nobody else does (+ there is no PENDING task)
        detail = 'hostname=%s, %s=%s, disk_id=%s, Error: %s' % (
            vm.hostname, view_item, ','.join(oldest), define.array_disk_id,
            get_task_error_message(res.data))
        task_log_error(task_id,
                       msg,
                       vm=vm,
                       detail=detail,
                       update_user_tasks=False)

    return res
Exemple #2
0
def _vm_update(vm):
    logger.info('Running PUT vm_manage(%s)', vm)
    from api.vm.base.views import vm_manage
    request = get_dummy_request(vm.dc, method='PUT', system_user=True)
    res = call_api_view(request, 'PUT', vm_manage, vm.hostname)

    if res.status_code == 201:
        logger.info('PUT vm_manage(%s) was successful: %s', vm, res.data)
    else:
        logger.error('PUT vm_manage(%s) failed: %s (%s): %s', vm,
                     res.status_code, res.status_text, res.data)
Exemple #3
0
def vm_snapshot_beat(snap_define_id):
    """
    This is a periodic beat task. Run POST vm_snapshot according to snapshot definition.
    """
    from api.vm.snapshot.views import vm_snapshot

    snap_define = SnapshotDefine.objects.get(id=snap_define_id)
    snap_name = snap_define.generate_snapshot_name()
    vm = snap_define.vm
    disk_id = snap_define.array_disk_id
    request = get_dummy_request(vm.dc, method='POST', system_user=True)
    request.define_id = snap_define.id  # Automatic task
    # Go!
    res = call_api_view(request,
                        'POST',
                        vm_snapshot,
                        vm.hostname,
                        snap_name,
                        data={
                            'disk_id': disk_id,
                            'fsfreeze': snap_define.fsfreeze
                        })

    if res.status_code == 201:
        logger.info(
            'POST vm_snapshot(%s, %s, {disk_id=%s}) was successful: %s', vm,
            snap_name, disk_id, res.data)
    else:
        # Need to log this, because nobody else does (+ there is no PENDING task)
        detail = 'snapname=%s, disk_id=%s, type=%s. Error: %s' % (
            snap_name, disk_id, Snapshot.AUTO, get_task_error_message(
                res.data))
        task_log_error(task_id_from_task_id(vm_snapshot_beat.request.id,
                                            dc_id=vm.dc.id),
                       LOG_SNAP_CREATE,
                       vm=vm,
                       detail=detail,
                       update_user_tasks=False)

        if res.status_code == HTTP_423_LOCKED:
            logger.warning(
                'Running POST vm_snapshot(%s, %s, {disk_id=%s}) failed: %s (%s): %s',
                vm, snap_name, disk_id, res.status_code, res.status_text,
                res.data)
        else:
            logger.error(
                'Running POST vm_snapshot(%s, %s, {disk_id=%s}) failed: %s (%s): %s',
                vm, snap_name, disk_id, res.status_code, res.status_text,
                res.data)
            MonitoringBackend.vm_send_alert(
                vm, 'Automatic snapshot %s/disk-%s@%s failed to start.' %
                (vm.hostname, disk_id, snap_define.name))
Exemple #4
0
def vm_backup_beat(bkp_define_id):
    """
    This is a periodic beat task. Run POST vm_backup according to backup definition.
    """
    from api.vm.backup.views import vm_backup

    bkp_define = BackupDefine.objects.get(id=bkp_define_id)
    vm = bkp_define.vm
    disk_id = bkp_define.array_disk_id
    defname = bkp_define.name
    request = get_dummy_request(vm.dc, method='POST', system_user=True)
    request.define_id = bkp_define.id  # Automatic task
    # Go!
    res = call_api_view(request,
                        'POST',
                        vm_backup,
                        vm.hostname,
                        defname,
                        data={
                            'disk_id': disk_id,
                            'fsfreeze': bkp_define.fsfreeze
                        })

    if res.status_code == 201:
        logger.info('POST vm_backup(%s, %s, {disk_id=%s}) was successful: %s',
                    vm, defname, disk_id, res.data)
    else:
        # Need to log this, because nobody else does (+ there is no PENDING task)
        detail = 'hostname=%s, bkpname=%s, disk_id=%s, Error: %s' % (
            vm.hostname, bkp_define.generate_backup_name(), disk_id,
            get_task_error_message(res.data))
        task_log_error(task_id_from_task_id(vm_backup_beat.request.id,
                                            dc_id=vm.dc.id),
                       LOG_BKP_CREATE,
                       vm=vm,
                       detail=detail,
                       update_user_tasks=False)

        if res.status_code == HTTP_423_LOCKED:
            logger.warning(
                'Running POST vm_backup(%s, %s, {disk_id=%s}) failed: %s (%s): %s',
                vm, defname, disk_id, res.status_code, res.status_text,
                res.data)
        else:
            logger.error(
                'Running POST vm_backup(%s, %s, {disk_id=%s}) failed: %s (%s): %s',
                vm, defname, disk_id, res.status_code, res.status_text,
                res.data)
            Zabbix.vm_send_alert(
                vm, 'Automatic backup %s/disk-%s@%s failed to start.' %
                (vm.hostname, disk_id, defname))
Exemple #5
0
def vm_update(vm):
    """
    Internal API used for updating VM if there were changes in jason detected.
    """
    logger.info(
        'Running PUT vm_manage(%s), because something (vnc port?) has changed changed',
        vm)
    from api.vm.base.views import vm_manage
    from api.utils.request import get_dummy_request
    from api.utils.views import call_api_view
    request = get_dummy_request(vm.dc, method='PUT', system_user=True)
    res = call_api_view(request, 'PUT', vm_manage, vm.hostname)

    if res.status_code == 201:
        logger.warn('PUT vm_manage(%s) was successful: %s', vm, res.data)
    else:
        logger.error('PUT vm_manage(%s) failed: %s (%s): %s', vm,
                     res.status_code, res.status_text, res.data)
Exemple #6
0
def _save_node_ip_address(task_id, node):
    """Helper function for saving IP address and creating DNS records of a new compute node"""
    assert node.address

    try:
        ip_address = node.create_ip_address()
    except IPAddress.DoesNotExist as exc:
        logger.warning(
            'Could not save node %s IP address "%s" into admin network (%s)',
            node, node.address, exc)
        return

    logger.info('Saving node %s IP address "%s" into admin network', node,
                node.ip_address)
    ip_address.save()

    admin_net = node.ip_address.subnet  # The network was updated by init_mgmt()
    # Reload Subnet object because it is cached inside node instance
    admin_net = admin_net.__class__.objects.get(pk=admin_net.pk)
    # We need a request object
    request = get_dummy_request(DefaultDc(), 'POST', system_user=True)
    record_cls = RecordView.Record

    if admin_net.dns_domain and admin_net.dns_domain == node.domain_name:
        logger.info('Creating forward A DNS record for node %s', node)
        # This will fail silently
        RecordView.add_or_update_record(request,
                                        record_cls.A,
                                        admin_net.dns_domain,
                                        node.hostname,
                                        node.address,
                                        task_id=task_id,
                                        related_obj=node)

    if admin_net.ptr_domain:
        logger.info('Creating reverse PTR DNS record for node %s', node)
        # This will fail silently
        RecordView.add_or_update_record(request,
                                        record_cls.PTR,
                                        admin_net.ptr_domain,
                                        record_cls.get_reverse(node.address),
                                        node.hostname,
                                        task_id=task_id,
                                        related_obj=node)
Exemple #7
0
    def sync(cls, node):
        """Run put() for all node storages on compute node"""
        request = get_dummy_request(DefaultDc(),
                                    method='PUT',
                                    system_user=True)
        data = {}
        result = {}

        for ns in node.nodestorage_set.all():
            view = cls(request, ns, data)
            result[ns] = tid, err = view.put(internal=True)

            if err:
                logger.error(
                    'Failed to create node_vm_snapshot_sync task for %s@%s. Error: %s',
                    ns.zpool, node.hostname, err)
            else:
                logger.info('Created node_vm_snapshot_sync task %s for %s@%s',
                            tid, ns.zpool, node.hostname)

        return result
Exemple #8
0
def vm_replica_failover_cb(result, task_id, vm_uuid=None, slave_vm_uuid=None):
    """
    A callback function for api.vm.replica.views.vm_replica_failover.
    """
    slave_vm = SlaveVm.get_by_uuid(slave_vm_uuid,
                                   sr=(
                                       'vm',
                                       'master_vm',
                                       'vm__node',
                                       'vm__dc',
                                   ))
    vm = slave_vm.master_vm
    assert vm.uuid == vm_uuid
    action = result['meta']['apiview']['method']
    force = result['meta']['apiview']['force']
    result, jsons = _parse_vm_replica_result(result,
                                             vm,
                                             slave_vm,
                                             action,
                                             key_json_idx=-1,
                                             cb_name='vm_replica_failover')
    sync_status = _save_svc_state(slave_vm, jsons)

    if result['returncode'] != 0:
        if sync_status is not None:
            slave_vm.save(update_fields=('sync_status', ))

        vm.revert_notready()
        msg = result['detail']
        logger.error(
            'Found nonzero returncode in result from %s vm_replica_failover(%s, %s). Error: %s',
            action, vm_uuid, slave_vm_uuid, msg)
        errmsg = _update_task_result_failure(result, msg)
        raise TaskException(result, errmsg)

    # New master VM was born
    # Delete tasks for old master
    if force:
        tasks = list(vm.tasks.keys())
        try:
            tasks.remove(task_id)
        except ValueError:
            pass
        _delete_tasks(vm, tasks)

    # Create internal shutdown task of old master VM
    old_vm_status = result['meta']['apiview']['orig_status']
    _vm_shutdown(vm)

    # Save new master, degrade old master
    slave_vm.master_vm.revert_notready(save=False)
    new_vm = slave_vm.fail_over()

    # Re-check status of old master (current degraded slave) because it was shut down,
    # but the state wasn't save (it was notready back then)
    vm_status_one(task_id, vm)

    # Continue with prompting of new master and degradation of old
    SlaveVm.switch_vm_snapshots_node_storages(new_vm,
                                              nss=vm.get_node_storages())
    # Force update of zabbix
    vm_json_active_changed.send(task_id,
                                vm=new_vm,
                                old_json_active={},
                                force_update=True)  # Signal!

    if new_vm.node != vm.node:
        vm_node_changed.send(task_id, vm=new_vm, force_update=True)  # Signal!

    msg = 'Server replica was successfully promoted to master'
    _update_task_result_success(result, slave_vm, action, msg)
    task_log_cb_success(result, task_id, vm=new_vm, **result['meta'])
    request = get_dummy_request(vm.dc, method='PUT', system_user=True)

    # Mark pending backups as "lost" :(  TODO: implement vm_backup_sync
    new_vm.backup_set.filter(status=Backup.PENDING).update(status=Backup.LOST)

    # Sync snapshots on new master VM (mark missing snapshots as "lost")
    for disk_id, _ in enumerate(new_vm.json_active_get_disks(), start=1):
        call_api_view(request,
                      'PUT',
                      vm_snapshot_list,
                      new_vm.hostname,
                      data={'disk_id': disk_id},
                      log_response=True)

    if old_vm_status == Vm.RUNNING:
        # Start new master VM
        call_api_view(request,
                      'PUT',
                      vm_status,
                      new_vm.hostname,
                      action='start',
                      log_response=True)

    return result
Exemple #9
0
def vm_create_cb(result, task_id, vm_uuid=None):
    """
    A callback function for api.vm.base.views.vm_manage.
    """
    vm = Vm.objects.select_related('dc').get(uuid=vm_uuid)
    msg = result.get('message', '')

    if result['returncode'] == 0 and msg.find('Successfully created') >= 0:
        json = result.pop('json', None)

        try:  # save json from smartos
            json_active = vm.json.load(json)
            vm.json_active = json_active
            vm.json = json_active
            if result['meta']['apiview']['recreate']:
                Snapshot.objects.filter(vm=vm).delete()
                SnapshotDefine.objects.filter(vm=vm).delete()
                BackupDefine.objects.filter(vm=vm).delete()
                vm.save_metadata('installed', False, save=False)

        except Exception as e:
            logger.error(
                'Could not parse json output from POST vm_manage(%s). Error: %s',
                vm_uuid, e)
            _vm_error(task_id, vm)
            logger.exception(e)
            raise TaskException(result, 'Could not parse json output')

        else:
            # save all
            vm.save(update_node_resources=True, update_storage_resources=True)
            vm_update_ipaddress_usage(vm)
            # vm_json_active_changed.send(task_id, vm=vm)  # Signal! -> not needed because vm_deployed is called below
            vm_created.send(task_id, vm=vm)  # Signal!

            if msg.find('Successfully started'
                        ) < 0:  # VM was created, but could not be started
                logger.error(
                    'VM %s was created, but could not be started! Error: %s',
                    vm_uuid, msg)
                _vm_error(task_id, vm)
                raise TaskException(result, 'Initial start failed (%s)' % msg)

            sendmail(vm.owner,
                     'vm/base/vm_create_subject.txt',
                     'vm/base/vm_create_email.txt',
                     extra_context={'vm': vm},
                     user_i18n=True,
                     dc=vm.dc,
                     fail_silently=True)

    else:
        logger.error(
            'Found nonzero returncode in result from POST vm_manage(%s). Error: %s',
            vm_uuid, msg)
        # Revert status and inform user
        _vm_create_cb_failed(result, task_id, vm)

        if result['meta']['apiview']['recreate'] and msg.find(
                'Successfully deleted') >= 0:
            _vm_error(task_id, vm)  # Something went terribly wrong

        # and FAIL this task
        raise TaskException(
            result, 'Got bad return code (%s). Error: %s' %
            (result['returncode'], msg))

    # So far so good. Now wait for deploy_over in vm_status_event_cb
    logger.info('VM %s is waiting for deploy_over...', vm_uuid)
    timer = 0
    repeat = 0

    while not vm.has_deploy_finished():
        if timer > VMS_VM_DEPLOY_TOOLONG:  # 10 minutes is too long
            if repeat == VMS_VM_DEPLOY_TOOLONG_MAX_CYCLES:  # 20 minutes is really too long
                logger.error('VM %s deploy process has timed out!', vm_uuid)
                _vm_error(task_id, vm)
                result['message'] = 'VM %s deploy has timed out' % vm.hostname
                task_log_cb_error(result, task_id, vm=vm, **result['meta'])
                return result

            repeat += 1
            timer = 0
            logger.error(
                'VM %s takes too long to deploy. Sending force stop/start',
                vm_uuid)
            # noinspection PyUnusedLocal
            tid, err = vm_reset(vm)

        sleep(3.0)
        timer += 3

    logger.info('VM %s is completely deployed!', vm_uuid)
    internal_metadata = vm.json.get(
        'internal_metadata', {}).copy()  # save internal_metadata for email
    vm = Vm.objects.select_related('dc', 'template').get(pk=vm.pk)  # Reload vm
    vm_deployed.send(task_id, vm=vm)  # Signal!
    sendmail(vm.owner,
             'vm/base/vm_deploy_subject.txt',
             'vm/base/vm_deploy_email.txt',
             fail_silently=True,
             extra_context={
                 'vm': vm,
                 'internal_metadata': internal_metadata
             },
             user_i18n=True,
             dc=vm.dc)

    try:
        result['message'] = '\n'.join(result['message'].strip().split('\n')
                                      [:-1])  # Remove "started" stuff
    except Exception as e:
        logger.exception(e)

    task_log_cb_success(result, task_id, vm=vm, **result['meta'])

    try:
        if vm.template:  # Try to create snapshot/backup definitions defined by template
            vm_define_snapshot, vm_define_backup = vm.template.vm_define_snapshot, vm.template.vm_define_backup

            if vm_define_snapshot or vm_define_backup:
                user = User.objects.get(id=user_id_from_task_id(task_id))
                request = get_dummy_request(vm.dc, method='POST', user=user)
                SnapshotDefineView.create_from_template(request,
                                                        vm,
                                                        vm_define_snapshot,
                                                        log=logger)
                BackupDefineView.create_from_template(request,
                                                      vm,
                                                      vm_define_backup,
                                                      log=logger)
    except Exception as e:
        logger.exception(e)

    return result
Exemple #10
0
def node_sysinfo_cb(result, task_id, node_uuid=None):
    """
    A callback function for updating Node.json (sysinfo).

    node_uuid will be set only if called via API or GUI
    """
    # in case the callback is called by restarting erigonesd:fast service on compute node, the meta dict lacks
    # a lot of information; msg is required as part of exception logging inside callback decorator
    # therefore we set it explicitly
    result['meta']['msg'] = LOG_NODE_UPDATE

    if result['returncode'] != 0:
        logger.error(
            'Found nonzero return code in result from esysinfo command on %s',
            node_uuid)
        raise TaskException(result,
                            'Got bad return code (%s)' % result['returncode'])

    stdout = result.pop('stdout', '')
    result.pop('stderr', None)
    node_new = False

    try:
        esysinfo = parse_esysinfo(stdout)
        img_sources = esysinfo.pop('img_sources')
        img_initial = esysinfo.pop('img_initial')
    except Exception as e:
        logger.error(
            'Could not parse output from esysinfo command on %s. Error: %s',
            node_uuid, e)
        logger.exception(e)
        raise TaskException(result, 'Could not parse esysinfo output')
    else:
        uuid = esysinfo['sysinfo']['UUID']

    try:
        node = Node.objects.get(uuid=uuid)
    except Node.DoesNotExist:
        # The head node must be in online state during the admin DC initialization and each compute node must be in
        # online state during ssh key exchange.
        node_new = True
        is_head = not Node.objects.exists()
        logger.warn('Creating NEW node from sysinfo output from %s', node_uuid)
        node = Node.create_from_sysinfo(uuid,
                                        esysinfo,
                                        status=Node.ONLINE,
                                        is_head=is_head)
        node_created.send(task_id, node=node)  # Signal!
        result[
            'message'] = 'Successfully created new compute node %s' % node.hostname
        task_log_success(task_id,
                         msg=LOG_NODE_CREATE,
                         obj=node,
                         task_result=result,
                         update_user_tasks=True)
        sshkey_changed = bool(node.sshkey)

        if node.is_head:
            logger.warn(
                'New node %s is the first node ever created - assuming head node status. '
                'Initializing mgmt system and creating admin DC', node)
            from api.system.init import init_mgmt
            try:
                init_mgmt(node, images=img_initial)
            except Exception as e:
                logger.exception(e)
                result[
                    'message'] = 'Error while initializing admin datacenter (%s)' % e
                task_log_error(task_id,
                               msg=LOG_NODE_CREATE,
                               obj=node,
                               task_result=result,
                               update_user_tasks=True)

        logger.info('Saving node %s IP address "%s" into admin network', node,
                    node.ip_address)
        try:  # We should proceed even if the IP address is not registered
            node.ip_address.save()
        except Exception as e:
            logger.exception(e)
        else:
            admin_net = node.ip_address.subnet  # The network was updated by init_mgmt()
            # Reload Subnet object because it is cached inside node instance
            admin_net = admin_net.__class__.objects.get(pk=admin_net.pk)
            # We need a request object
            request = get_dummy_request(DefaultDc(), 'POST', system_user=True)
            record_cls = RecordView.Record

            if admin_net.dns_domain and admin_net.dns_domain == node.domain_name:
                logger.info('Creating forward A DNS record for node %s', node)
                # This will fail silently
                RecordView.add_or_update_record(request,
                                                record_cls.A,
                                                admin_net.dns_domain,
                                                node.hostname,
                                                node.address,
                                                task_id=task_id,
                                                related_obj=node)

            if admin_net.ptr_domain:
                logger.info('Creating reverse PTR DNS record for node %s',
                            node)
                # This will fail silently
                RecordView.add_or_update_record(request,
                                                record_cls.PTR,
                                                admin_net.ptr_domain,
                                                record_cls.get_reverse(
                                                    node.address),
                                                node.hostname,
                                                task_id=task_id,
                                                related_obj=node)

    else:
        sshkey_changed = node.sshkey_changed(esysinfo)

        if node.sysinfo_changed(esysinfo) or sshkey_changed:
            logger.warn('Updating node %s json with sysinfo output from %s',
                        node, node_uuid)
            node.update_from_sysinfo(esysinfo)  # Will save public SSH key too
            node_json_changed.send(task_id, node=node)  # Signal!
            result[
                'message'] = 'Successfully updated compute node %s' % node.hostname
            task_log_success(task_id,
                             msg=LOG_NODE_UPDATE,
                             obj=node,
                             task_result=result,
                             update_user_tasks=True)
        else:
            result[
                'message'] = 'No changes detected on compute node %s' % node.hostname
            task_log_success(task_id,
                             msg=LOG_NODE_UPDATE,
                             obj=node,
                             task_result=result,
                             update_user_tasks=True)

    if sshkey_changed:
        logger.warn(
            'SSH key has changed on node %s - creating authorized_keys synchronization tasks',
            node)
        try:
            run_node_authorized_keys_sync()
        except Exception as e:
            logger.exception(e)

    try:
        run_node_img_sources_sync(node, node_img_sources=img_sources)
    except Exception as e:
        logger.exception(e)

    if node_new:
        node.del_initializing()
        # Used by esdc-ee to change node status to unlicensed
        node_status = getattr(settings, 'VMS_NODE_STATUS_DEFAULT', None)

        if node_status:
            node.save_status(
                node_status)  # Set node status (most probably to unlicensed)
    else:
        # Always run vm_status_all for an old compute node
        vm_status_all(task_id, node)

        # Sync snapshots and backup for every node storage
        try:
            NodeVmSnapshotList.sync(node)
        except Exception as e:
            logger.exception(e)

    return result
Exemple #11
0
def init_mgmt(head_node, images=None):
    """
    Initialize the system and create the "admin" datacenter.
    """
    from api.dc.views import dc_node, dc_settings, dc_domain
    from api.network.base.views import net_manage
    from api.dns.domain.views import dns_domain
    from api.node.vm.views import harvest_vm

    admin = settings.VMS_DC_ADMIN
    main = settings.VMS_DC_MAIN
    # Admin DC and default DC should already exist (initial_data)
    admin_dc = Dc.objects.get_by_name(admin)
    default_dc = Dc.objects.get_by_name(main)
    # We need some request with admin DC - important for all subsequent commands
    request = get_dummy_request(admin_dc, method='POST', system_user=True)
    # All api calls will use the POST method...
    api_post = partial(_api_cmd, request, 'POST')
    # ...except net_manage, dns_record and dc_settings
    api_put = partial(_api_cmd, request, 'PUT')

    # Initialize images
    if images and isinstance(images, list):
        logger.warn('Initializing %d images', len(images))
        _init_images(head_node, images, default_dc, admin_dc)
    else:
        logger.error('Could not parse initial images or empty initial images')

    # Create DNS zone for the domain set during head node installation
    try:
        admin_zone = head_node.domain_name

        if admin_zone:
            api_post(dns_domain,
                     admin_zone,
                     owner=settings.ADMIN_USERNAME,
                     dc_bound=False)
    except Exception as e:
        admin_zone = None
        logger.exception(e)

    # Setup miscellaneous stuff depending on admin network info
    try:
        mgmt_ifconfig = get_local_netinfo()
        mgmt_ip = mgmt_ifconfig['addr']

        try:
            mgmt_net = ipaddress.ip_network(u'%(network)s/%(netmask)s' %
                                            mgmt_ifconfig)
        except Exception as exc:
            logger.exception(exc)
        else:
            try:  # Create reverse dns domain
                ptr_zone = reverse_domain_from_network(mgmt_net)
                api_post(dns_domain,
                         ptr_zone,
                         owner=settings.ADMIN_USERNAME,
                         dc_bound=False)
                api_post(dc_domain, ptr_zone, dc=main)
                api_post(dc_domain, ptr_zone, dc=admin)
            except Exception as exc:
                logger.exception(exc)
            else:
                # Set PTR zone for admin network
                mgmt_ifconfig['ptr_domain'] = ptr_zone

        # Change admin network subnet according to ip/netmask/gw on this machine (mgmt01.local)
        api_put(net_manage,
                settings.VMS_NET_ADMIN,
                dns_domain=admin_zone,
                **mgmt_ifconfig)

        # Change SITE_LINK and SITE_SIGNATURE both datacenters (#549, #551)
        site_link = 'https://%s' % mgmt_ip
        site_signature = settings.SITE_SIGNATURE.replace(
            settings.SITE_LINK, site_link)
        api_put(dc_settings,
                main,
                SITE_LINK=site_link,
                SITE_SIGNATURE=site_signature)
        api_put(dc_settings,
                admin,
                SITE_LINK=site_link,
                SITE_SIGNATURE=site_signature)
        _es_api_url(site_link)
    except Exception as e:
        logger.exception(e)

    # Add head node + all its storages into admin DC
    api_post(dc_node,
             head_node.hostname,
             strategy=DcNode.SHARED,
             add_storage=9,
             dc=admin)

    logger.warning('Admin datacenter "%s" was successfully initialized',
                   admin_dc)

    # Harvest all VMs from head node into admin DC
    while True:
        ret = api_post(harvest_vm, head_node.hostname, dc=admin)

        if status.is_success(ret.status_code):
            logger.info('POST harvest_vm(%s) has started: %s',
                        head_node.hostname, ret.data)
            break
        else:
            logger.error(
                'POST harvest_vm(%s) has failed; retrying in 3 seconds',
                head_node.hostname)
            sleep(3)

    # The harvest is performing some other tasks asynchronously during which the node must stay in online state.
    # So let's sleep for some time to give the tasks some breathing space.
    logger.info(
        'Sleeping for 60 seconds after admin datacenter initialization')
    sleep(60)

    # Let's update the default image server after we've harvested the VMS_VM_IMG01
    try:
        if Vm.objects.filter(uuid=settings.VMS_VM_IMG01).exists():
            vm_img01_uuid = settings.VMS_VM_IMG01
        else:
            vm_img01_uuid = None

        if settings.VMS_IMAGE_VM == vm_img01_uuid:
            logger.info(
                'The current image server (VMS_IMAGE_VM) is already set to %s',
                vm_img01_uuid)
        else:
            api_put(dc_settings, main, VMS_IMAGE_VM=vm_img01_uuid)
    except Exception as e:
        logger.exception(e)

    # We can change the default resolvers after we've harvested the VMS_VM_DNS01 (#chili-831)
    try:
        try:
            vm_dns01_ip = Vm.objects.get(uuid=settings.VMS_VM_DNS01).ips[0]
        except Vm.DoesNotExist:
            logger.warning('DNS VM (%s) not found - using default DNS servers',
                           settings.VMS_VM_DNS01)
        else:
            api_put(dc_settings, main, VMS_VM_RESOLVERS_DEFAULT=[vm_dns01_ip])
            api_put(dc_settings, admin, VMS_VM_RESOLVERS_DEFAULT=[vm_dns01_ip])
    except Exception as e:
        logger.exception(e)

    return ret
Exemple #12
0
def vm_migrate_cb(result, task_id, vm_uuid=None, slave_vm_uuid=None):
    """
    A callback function for api.vm.migrate.views.vm_migrate.
    """
    ghost_vm = SlaveVm.get_by_uuid(slave_vm_uuid)
    msg = result.get('message', '')

    if result['returncode'] == 0 and msg and 'Successfully migrated' in msg:
        # Save node and delete placeholder VM first
        node = ghost_vm.vm.node
        nss = set(ghost_vm.vm.get_node_storages())
        ghost_vm.delete(
        )  # post_delete signal will update node and storage resources
        # Fetch VM after ghost_vm is deleted, because it updates vm.slave_vms array
        vm = Vm.objects.select_related('node', 'dc').get(uuid=vm_uuid)
        changing_node = vm.node != ghost_vm.vm.node
        json = result.pop('json', None)

        try:  # save json from smartos
            json_active = vm.json.load(json)
            vm.json_active = json_active
            vm.json = json_active
        except Exception as e:
            logger.exception(e)
            logger.error(
                'Could not parse json output from vm_migrate(%s). Error: %s',
                vm_uuid, e)
            raise TaskException(result, 'Could not parse json output')

        nss.update(list(vm.get_node_storages()))
        # Revert status and set new node (should trigger node resource update)
        vm.revert_notready(save=False)
        if changing_node:
            vm.set_node(node)
        vm.save(update_node_resources=True, update_storage_resources=nss)
        SlaveVm.switch_vm_snapshots_node_storages(vm, nss=nss)
        vm_node_changed.send(task_id, vm=vm, force_update=True)  # Signal!

    else:
        vm = Vm.objects.get(uuid=vm_uuid)
        _vm_migrate_cb_failed(result, task_id, vm, ghost_vm)
        logger.error(
            'Found nonzero returncode in result from vm_migrate(%s). Error: %s',
            vm_uuid, msg)
        raise TaskException(
            result, 'Got bad return code (%s). Error: %s' %
            (result['returncode'], msg))

    task_log_cb_success(result, task_id, vm=vm, **result['meta'])

    if vm.json_changed():
        logger.info(
            'Running PUT vm_manage(%s), because something (vnc port?) has changed changed',
            vm)
        from api.vm.base.views import vm_manage
        from api.utils.request import get_dummy_request
        from api.utils.views import call_api_view
        request = get_dummy_request(vm.dc, method='PUT', system_user=True)
        res = call_api_view(request, 'PUT', vm_manage, vm.hostname)

        if res.status_code == 201:
            logger.warn('PUT vm_manage(%s) was successful: %s', vm, res.data)
        else:
            logger.error('PUT vm_manage(%s) failed: %s (%s): %s', vm,
                         res.status_code, res.status_text, res.data)

    return result
Exemple #13
0
def harvest_vm_cb(result, task_id, node_uuid=None):
    node = Node.objects.get(uuid=node_uuid)
    dc = Dc.objects.get_by_id(dc_id_from_task_id(task_id))
    err = result.pop('stderr', None)
    vms = []
    vms_err = []
    jsons = []

    if result.pop('returncode', None) != 0 or err:
        logger.error(
            'Found nonzero returncode in result from harvest_vm(%s). Error: %s',
            node, err)
        raise TaskException(
            result, 'Got bad return code (%s). Error: %s' %
            (result['returncode'], err))

    for json in result.pop('stdout', '').split('||||'):
        json = json.strip()
        if json:
            try:
                jsons.append(PickleDict.load(json))
            except Exception as e:
                logger.error(
                    'Could not parse json output from harvest_vm(%s). Error: %s',
                    node, e)
                raise TaskException(result, 'Could not parse json output')

    if not jsons:
        raise TaskException(result, 'Missing json output')

    request = get_dummy_request(dc, method='POST', system_user=True)

    for json in jsons:
        vm_uuid = json.get(
            'uuid', None)  # Bad uuid will be stopped later in vm_from_json()
        if vm_uuid:
            if Vm.objects.filter(uuid=vm_uuid).exists():
                logger.warning('Ignoring VM %s found in harvest_vm(%s)',
                               vm_uuid, node)
                continue
        try:
            vm = vm_from_json(request,
                              task_id,
                              json,
                              dc,
                              template=True,
                              save=True,
                              update_ips=True,
                              update_dns=True)
        except Exception as e:
            logger.exception(e)
            logger.error('Could not load VM from json:\n"""%s"""', json)
            err_msg = 'Could not load server %s. Error: %s' % (vm_uuid, e)
            task_log_cb_error({'message': err_msg},
                              task_id,
                              obj=node,
                              **result['meta'])
            vms_err.append(vm_uuid)
        else:
            logger.info('Successfully saved new VM %s after harvest_vm(%s)',
                        vm, node)
            vms.append(vm.hostname)
            vm_deployed.send(task_id,
                             vm=vm)  # Signal!  (will update monitoring)

            if vm.json_changed():
                try:
                    _vm_update(vm)
                except Exception as e:
                    logger.exception(e)

    if vms or not vms_err:
        if vms:
            result['message'] = 'Successfully harvested %s server(s) (%s)' % (
                len(vms), ','.join(vms))
        else:
            result['message'] = 'No new server found'

        task_log_cb_success(result, task_id, obj=node, **result['meta'])
        return result
    else:
        raise TaskException(result, 'Could not find or load any server')