Exemple #1
0
def node_sysinfo_cb(result, task_id, node_uuid=None):
    """
    A callback function for updating Node.json (sysinfo).

    node_uuid will be set only if called via API or GUI
    """
    # in case the callback is called by restarting erigonesd:fast service on compute node, the meta dict lacks
    # a lot of information; msg is required as part of exception logging inside callback decorator
    # therefore we set it explicitly
    result['meta']['msg'] = LOG_NODE_UPDATE

    if result['returncode'] != 0:
        logger.error(
            'Found nonzero return code in result from esysinfo command on %s',
            node_uuid)
        raise TaskException(result,
                            'Got bad return code (%s)' % result['returncode'])

    stdout = result.pop('stdout', '')
    result.pop('stderr', None)
    node_new = False

    try:
        esysinfo = parse_esysinfo(stdout)
        img_sources = esysinfo.pop('img_sources')
        img_initial = esysinfo.pop('img_initial')
    except Exception as e:
        logger.error(
            'Could not parse output from esysinfo command on %s. Error: %s',
            node_uuid, e)
        logger.exception(e)
        raise TaskException(result, 'Could not parse esysinfo output')
    else:
        uuid = esysinfo['sysinfo']['UUID']

    try:
        node = Node.objects.get(uuid=uuid)
    except Node.DoesNotExist:
        # The head node must be in online state during the admin DC initialization and each compute node must be in
        # online state during ssh key exchange.
        node_new = True
        is_head = not Node.objects.exists()
        logger.warn('Creating NEW node from sysinfo output from %s', node_uuid)
        node = Node.create_from_sysinfo(uuid,
                                        esysinfo,
                                        status=Node.ONLINE,
                                        is_head=is_head)
        node_created.send(task_id, node=node)  # Signal!
        result[
            'message'] = 'Successfully created new compute node %s' % node.hostname
        task_log_success(task_id,
                         msg=LOG_NODE_CREATE,
                         obj=node,
                         task_result=result,
                         update_user_tasks=True)
        sshkey_changed = bool(node.sshkey)

        if node.is_head:
            logger.warn(
                'New node %s is the first node ever created - assuming head node status. '
                'Initializing mgmt system and creating admin DC', node)
            from api.system.init import init_mgmt
            try:
                init_mgmt(node, images=img_initial)
            except Exception as e:
                logger.exception(e)
                result[
                    'message'] = 'Error while initializing admin datacenter (%s)' % e
                task_log_error(task_id,
                               msg=LOG_NODE_CREATE,
                               obj=node,
                               task_result=result,
                               update_user_tasks=True)

        logger.info('Saving node %s IP address "%s" into admin network', node,
                    node.ip_address)
        try:  # We should proceed even if the IP address is not registered
            node.ip_address.save()
        except Exception as e:
            logger.exception(e)
        else:
            admin_net = node.ip_address.subnet  # The network was updated by init_mgmt()
            # Reload Subnet object because it is cached inside node instance
            admin_net = admin_net.__class__.objects.get(pk=admin_net.pk)
            # We need a request object
            request = get_dummy_request(DefaultDc(), 'POST', system_user=True)
            record_cls = RecordView.Record

            if admin_net.dns_domain and admin_net.dns_domain == node.domain_name:
                logger.info('Creating forward A DNS record for node %s', node)
                # This will fail silently
                RecordView.add_or_update_record(request,
                                                record_cls.A,
                                                admin_net.dns_domain,
                                                node.hostname,
                                                node.address,
                                                task_id=task_id,
                                                related_obj=node)

            if admin_net.ptr_domain:
                logger.info('Creating reverse PTR DNS record for node %s',
                            node)
                # This will fail silently
                RecordView.add_or_update_record(request,
                                                record_cls.PTR,
                                                admin_net.ptr_domain,
                                                record_cls.get_reverse(
                                                    node.address),
                                                node.hostname,
                                                task_id=task_id,
                                                related_obj=node)

    else:
        sshkey_changed = node.sshkey_changed(esysinfo)

        if node.sysinfo_changed(esysinfo) or sshkey_changed:
            logger.warn('Updating node %s json with sysinfo output from %s',
                        node, node_uuid)
            node.update_from_sysinfo(esysinfo)  # Will save public SSH key too
            node_json_changed.send(task_id, node=node)  # Signal!
            result[
                'message'] = 'Successfully updated compute node %s' % node.hostname
            task_log_success(task_id,
                             msg=LOG_NODE_UPDATE,
                             obj=node,
                             task_result=result,
                             update_user_tasks=True)
        else:
            result[
                'message'] = 'No changes detected on compute node %s' % node.hostname
            task_log_success(task_id,
                             msg=LOG_NODE_UPDATE,
                             obj=node,
                             task_result=result,
                             update_user_tasks=True)

    if sshkey_changed:
        logger.warn(
            'SSH key has changed on node %s - creating authorized_keys synchronization tasks',
            node)
        try:
            run_node_authorized_keys_sync()
        except Exception as e:
            logger.exception(e)

    try:
        run_node_img_sources_sync(node, node_img_sources=img_sources)
    except Exception as e:
        logger.exception(e)

    if node_new:
        node.del_initializing()
        # Used by esdc-ee to change node status to unlicensed
        node_status = getattr(settings, 'VMS_NODE_STATUS_DEFAULT', None)

        if node_status:
            node.save_status(
                node_status)  # Set node status (most probably to unlicensed)
    else:
        # Always run vm_status_all for an old compute node
        vm_status_all(task_id, node)

        # Sync snapshots and backup for every node storage
        try:
            NodeVmSnapshotList.sync(node)
        except Exception as e:
            logger.exception(e)

    return result
Exemple #2
0
def node_sysinfo_cb(result, task_id, node_uuid=None):
    """
    A callback function for updating Node.json (sysinfo).

    node_uuid will be set only if called via API or GUI
    """
    # in case the callback is called by restarting erigonesd:fast service on compute node, the meta dict lacks
    # a lot of information; msg is required as part of exception logging inside callback decorator
    # therefore we set it explicitly
    result['meta']['msg'] = LOG_NODE_UPDATE

    if result['returncode'] != 0:
        logger.error(
            'Found nonzero return code in result from esysinfo command on %s',
            node_uuid)
        raise TaskException(result,
                            'Got bad return code (%s)' % result['returncode'])

    stdout = result.pop('stdout', '')
    result.pop('stderr', None)
    node_new = False

    try:
        esysinfo = parse_esysinfo(stdout)
        img_sources = esysinfo.pop('img_sources')
        img_initial = esysinfo.pop('img_initial')
    except Exception as e:
        logger.error(
            'Could not parse output from esysinfo command on %s. Error: %s',
            node_uuid, e)
        logger.exception(e)
        raise TaskException(result, 'Could not parse esysinfo output')
    else:
        uuid = esysinfo['sysinfo']['UUID']

    try:
        node = Node.objects.get(uuid=uuid)
    except Node.DoesNotExist:
        # The head node must be in online state during the admin DC initialization and each compute node must be in
        # online state during ssh key exchange.
        node_new = True
        is_head = not Node.objects.exists()
        logger.warn('Creating NEW node from sysinfo output from %s', node_uuid)
        node = Node.create_from_sysinfo(uuid,
                                        esysinfo,
                                        status=Node.ONLINE,
                                        is_head=is_head)
        node_created.send(task_id, node=node)  # Signal!
        result[
            'message'] = 'Successfully created new compute node %s' % node.hostname
        task_log_success(task_id,
                         msg=LOG_NODE_CREATE,
                         obj=node,
                         task_result=result,
                         update_user_tasks=True)
        sshkey_changed = bool(node.sshkey)

        if node.is_head:
            logger.warn(
                'New node %s is the first node ever created - assuming head node status. '
                'Initializing mgmt system and creating admin DC', node)
            from api.system.init import init_mgmt
            try:
                init_mgmt(node, images=img_initial)
            except Exception as e:
                logger.exception(e)
                result[
                    'message'] = 'Error while initializing admin datacenter (%s)' % e
                task_log_error(task_id,
                               msg=LOG_NODE_CREATE,
                               obj=node,
                               task_result=result,
                               update_user_tasks=True)

        try:
            _save_node_ip_address(task_id, node)
        except Exception as e:
            logger.exception(e)
    else:
        sshkey_changed = node.sshkey_changed(esysinfo)
        sysinfo_changed = node.sysinfo_changed(esysinfo)

        if sysinfo_changed or sshkey_changed:
            logger.warn('Updating node %s json with sysinfo output from %s',
                        node, node_uuid)
            node.update_from_sysinfo(esysinfo)  # Will save public SSH key too
            node_json_changed.send(task_id, node=node)  # Signal!
            result[
                'message'] = 'Successfully updated compute node %s' % node.hostname
        else:
            node_json_unchanged.send(task_id, node=node)  # Signal!
            result[
                'message'] = 'No changes detected on compute node %s' % node.hostname

        task_log_success(task_id,
                         msg=LOG_NODE_UPDATE,
                         obj=node,
                         task_result=result,
                         update_user_tasks=True)

    if sshkey_changed:
        logger.warn(
            'SSH key has changed on node %s - creating authorized_keys synchronization tasks',
            node)
        try:
            run_node_authorized_keys_sync()
        except Exception as e:
            logger.exception(e)

    try:
        run_node_img_sources_sync(node, node_img_sources=img_sources)
    except Exception as e:
        logger.exception(e)

    if node_new:
        node.del_initializing()
        # Used by esdc-ee to change node status to unlicensed
        node_status = getattr(settings, 'VMS_NODE_STATUS_DEFAULT', None)

        if node_status:
            node.save_status(
                node_status)  # Set node status (most probably to unlicensed)
    else:
        # Always run vm_status_all for an old compute node
        vm_status_all(task_id, node)

        # Sync snapshots and backup for every node storage
        try:
            NodeVmSnapshotList.sync(node)
        except Exception as e:
            logger.exception(e)

    # Refresh cached version information + emit event informing about restarted erigonesd:fast
    try:
        del node.system_version

        # Sometimes the node worker does not respond within the given timeout so we have to try more than once
        for i in range(5):
            if node.system_version:
                break

        logger.info('Node %s has system version %s', node, node.system_version)

        if owner_id_from_task_id(task_id) == TASK_USER:  # internal user ID
            NodeSystemRestarted(node,
                                system_version=node.system_version).send()

    except Exception as e:
        logger.exception(e)

    return result