Example #1
0
def get_public_ip(infrastructure_id):
    """
    Get the public IP of infrastructure
    """
    public_ip = None

    # Get full list of cloud info
    clouds_info_list = utilities.create_clouds_list(
        CONFIG.get('clouds', 'path'))

    # Setup Infrastructure Manager client
    im_auth = utilities.create_im_auth(cloud, None, clouds_info_list)
    client = imclient.IMClient(url=CONFIG.get('im', 'url'), data=im_auth)
    (status, msg) = client.getauth()
    if status != 0:
        logger.critical('Error reading IM auth file: %s', msg)
        return None

    # Get data associated with the Infra ID & find the public IP
    (state, msg) = client.getdata(infrastructure_id, 60)
    m = re.search(r'net_interface.0.ip = \'([\.\d]+)\'', state['data'])
    if m:
        public_ip = m.group(1)

    return public_ip
Example #2
0
def get_static_ansible_node(cloud):
    """
    Check if the given cloud has a static Ansible node and return it's details if it does
    """
    for cloud_info in utilities.create_clouds_list(CONFIG.get(
            'clouds', 'path')):
        if cloud_info['name'] == cloud:
            if 'ansible' in cloud_info:
                return (cloud_info['ansible']['public_ip'],
                        cloud_info['ansible']['username'])

    return (None, None)
Example #3
0
def delete_infrastructure(infra_id):
    """
    Delete the specified infrastructure
    """
    logger = custom_logger.CustomAdapter(logging.getLogger(__name__),
                                         {'id': infra_id})

    if 'type' not in request.args:
        db = database.get_db()
        if db.connect():
            # Get current status of infrastructure
            (_, status, _, _, _) = db.deployment_get_im_infra_id(infra_id)

            # If it has already been deleted, don't do anything but return success
            if status == 'deleted':
                db.close()
                logger.info('Infrastructure has already been deleted')
                return jsonify({}), 200
            elif status == 'deletion-requested':
                db.close()
                logger.info(
                    'Infrastructure deletion has already been requested')
                return jsonify({}), 200

            success = db.deployment_update_status_with_retries(
                infra_id, 'deletion-requested')
            if success:
                db.close()
                logger.info(
                    'Infrastructure deletion request successfully initiated')
                return jsonify({}), 200
        logger.critical(
            'Infrastructure deletion request failed, possibly a database issue'
        )
        return jsonify({}), 400
    elif request.args.get('type') == 'im':
        cloud = request.args.get('cloud')
        db = database.get_db()
        if db.connect():
            clouds_info_list = utilities.create_clouds_list(
                CONFIG.get('clouds', 'path'))
            token = tokens.get_token(cloud, None, db, clouds_info_list)
            db.close()
            im_auth = utilities.create_im_auth(cloud, token, clouds_info_list)
            client = imclient.IMClient(url=CONFIG.get('im', 'url'),
                                       data=im_auth)
            (status, msg) = client.getauth()
            if status != 0:
                logger.critical('Error reading IM auth file: %s', msg)
                return jsonify({}), 400
            client.destroy(infra_id, 30)
            return jsonify({}), 200
    return jsonify({}), 400
Example #4
0
def delete_ansible_node(cloud, identity, db):
    """
    Delete an Ansible node for the specified cloud
    """
    # Get details about the node
    (infrastructure_id, public_ip, username,
     timestamp) = db.get_ansible_node(cloud)

    if not infrastructure_id:
        logger.critical(
            '[delete_ansible_node] Unable to get infrastructure id for Ansible node in cloud %s',
            cloud)
        return False

    logger.info(
        '[delete_ansible_node] About to delete Ansible node from clouds %s with infrastructure id %s',
        cloud, infrastructure_id)

    # Get full list of cloud info
    clouds_info_list = utilities.create_clouds_list(
        CONFIG.get('clouds', 'path'))

    #  Get a token if necessary
    token = tokens.get_token(cloud, identity, db, clouds_info_list)

    # Destroy infrastructure
    im_auth = utilities.create_im_auth(cloud, token, clouds_info_list)
    client = imclient.IMClient(url=CONFIG.get('im', 'url'), data=im_auth)
    (status, msg) = client.getauth()
    if status != 0:
        logger.critical('Error reading IM auth file: %s', msg)
        return False
    (return_code,
     msg) = client.destroy(infrastructure_id,
                           int(CONFIG.get('timeouts', 'deletion')))
    if return_code != 0:
        logger.critical(
            'Unable to destroy Ansible node infrastructure with id "%s" on cloud "%s" due to "%s"',
            infrastructure_id, cloud, msg)

    # Delete from the DB
    db.delete_ansible_node(cloud)

    return True
Example #5
0
def get_infrastructures():
    """
    Get list of infrastructures in the specified state or type
    """
    if 'status' in request.args and 'type' not in request.args:
        cloud = None
        if 'cloud' in request.args:
            cloud = request.args.get('cloud')
        db = database.get_db()
        if db.connect():
            infra = db.deployment_get_infra_in_state_cloud(
                request.args.get('status'), cloud)
            db.close()
            return jsonify(infra), 200
    elif 'type' in request.args and 'cloud' in request.args:
        if request.args.get('type') == 'im':
            cloud = request.args.get('cloud')
            db = database.get_db()
            if db.connect():
                clouds_info_list = utilities.create_clouds_list(
                    CONFIG.get('clouds', 'path'))
                token = tokens.get_token(cloud, None, db, clouds_info_list)
                db.close()
                im_auth = utilities.create_im_auth(cloud, token,
                                                   clouds_info_list)
                client = imclient.IMClient(url=CONFIG.get('im', 'url'),
                                           data=im_auth)
                (status, msg) = client.getauth()
                if status != 0:
                    logger.critical('Error reading IM auth file: %s', msg)
                    return jsonify({}), 400
                (status, ids) = client.list_infra_ids(10)
                im_list = []
                if ids:
                    for uri in ids:
                        pieces = uri.split('/')
                        im_id = pieces[len(pieces) - 1]
                        im_list.append(im_id)
                    return jsonify(im_list), 200

    return jsonify({}), 400
Example #6
0
def deploy_job(db, unique_id):
    """
    Find an appropriate resource to deploy infrastructure
    """
    # Get JSON description & identity from the DB
    (description, identity, identifier) = db.deployment_get_json(unique_id)
    logger.info('Deploying infrastructure %s with identifier %s', unique_id,
                identifier)

    # Get RADL
    radl_contents = utilities.get_radl(description)
    if not radl_contents:
        logging.critical('RADL must be provided')
        db.deployment_update_status_with_retries(unique_id, 'unable')
        return None

    # Get requirements & preferences
    (requirements, preferences) = utilities.get_reqs_and_prefs(description)

    # Count number of instances
    instances = utilities.get_num_instances(radl_contents)
    logger.info('Found %d instances to deploy', instances)
    requirements['resources']['instances'] = instances

    # Generate JSON to be given to Open Policy Agent
    userdata = {'requirements': requirements, 'preferences': preferences}
    userdata_check = {
        'requirements': requirements,
        'preferences': preferences,
        'ignore_usage': True
    }

    # Setup Open Policy Agent client
    opa_client = opaclient.OPAClient(url=CONFIG.get('opa', 'url'),
                                     timeout=int(CONFIG.get('opa', 'timeout')))

    # Update available resources & their static info if necessary
    logger.info('Updating static resources info')
    utilities.update_resources(opa_client, CONFIG.get('clouds', 'path'))

    # Get full list of cloud info
    clouds_info_list = utilities.create_clouds_list(
        CONFIG.get('clouds', 'path'))

    # Update cloud images & flavours if necessary
    logger.info('Updating cloud images and flavours if necessary')
    cloud_images_flavours.update_cloud_details(requirements, db, identity,
                                               opa_client, clouds_info_list)

    # Check if deployment could be possible, ignoring current quotas/usage
    logger.info('Checking if job requirements will match any clouds')
    try:
        clouds_check = opa_client.get_clouds(userdata_check)
    except Exception as err:
        logger.critical('Unable to get list of clouds due to %s:', err)
        return None

    if not clouds_check:
        logger.critical(
            'No clouds exist which meet the requested requirements')
        db.deployment_update_status_reason(unique_id, 'NoMatchingResources')
        return None

    # Update quotas if necessary
    logger.info('Updating cloud quotas if necessary')
    cloud_quotas.set_quotas(requirements, db, identity, opa_client,
                            clouds_info_list)

    # Check if clouds are functional
    logger.info('Checking if resources are functional')
    utilities.update_clouds_status(opa_client, db, identity, clouds_info_list)

    # Get list of clouds meeting the specified requirements
    try:
        clouds = opa_client.get_clouds(userdata)
    except Exception as err:
        logger.critical('Unable to get list of resources due to %s:', err)
        return False

    logger.info('Suitable resources = [%s]', ','.join(clouds))

    if not clouds:
        logger.critical(
            'No resources exist which meet the requested requirements')
        db.deployment_update_status_reason(unique_id,
                                           'NoMatchingResourcesAvailable')
        return False

    # Shuffle list of clouds
    shuffle(clouds)

    # Rank clouds as needed
    try:
        clouds_ranked = opa_client.get_ranked_clouds(userdata, clouds)
    except Exception as err:
        logger.critical('Unable to get list of ranked clouds due to:', err)
        return False

    clouds_ranked_list = []
    for item in sorted(clouds_ranked, key=lambda k: k['weight'], reverse=True):
        clouds_ranked_list.append(item['site'])
    logger.info('Ranked clouds = [%s]', ','.join(clouds_ranked_list))

    # Check if we still have any clouds meeting requirements & preferences
    if not clouds_ranked:
        logger.critical(
            'No suitables clouds after ranking - if we get to this point there must be a bug in the OPA policy'
        )
        db.deployment_update_status_reason(unique_id, 'DeploymentFailed')
        return False

    # Check if we should stop
    (im_infra_id_new, infra_status_new, cloud_new, _,
     _) = db.deployment_get_im_infra_id(unique_id)
    if infra_status_new in ('deletion-requested', 'deleted', 'deletion-failed',
                            'deleting'):
        logger.info(
            'Deletion requested of infrastructure, aborting deployment')
        return False

    # Try to create infrastructure, exiting on the first successful attempt
    time_begin = time.time()
    success = False

    for item in sorted(clouds_ranked, key=lambda k: k['weight'], reverse=True):
        infra_id = None
        cloud = item['site']

        resource_type = None
        for cloud_info in clouds_info_list:
            if cloud_info['name'] == cloud:
                resource_type = cloud_info['type']

        if resource_type:
            logger.info('Resource %s is of type %s', cloud, resource_type)
        else:
            logger.info(
                'Skipping because no resource type could be determined for resource %s',
                cloud)
            continue

        if resource_type == 'cloud':
            try:
                image = opa_client.get_image(userdata, cloud)
            except Exception as err:
                logger.critical('Unable to get image due to:', err)
                return False

            try:
                flavour = opa_client.get_flavour(userdata, cloud)
            except Exception as err:
                logger.critical('Unable to get flavour due to:', err)
                return False

            # If no flavour meets the requirements we should skip the current cloud
            if not flavour:
                logger.info('Skipping because no flavour could be determined')
                continue

            # If no image meets the requirements we should skip the current cloud
            if not image:
                logger.info('Skipping because no image could be determined')
                continue

            logger.info(
                'Attempting to deploy on cloud %s with image %s and flavour %s',
                cloud, image, flavour)

            # Setup Ansible node if necessary
            if requirements['resources']['instances'] > 1:
                (ip_addr,
                 username) = ansible.setup_ansible_node(cloud, identity, db)
                if not ip_addr or not username:
                    logger.critical(
                        'Unable to find existing or create an Ansible node in cloud %s because ip=%s,username=%s',
                        cloud, ip_addr, username)
                    continue
                logger.info(
                    'Ansible node in cloud %s available, now will deploy infrastructure for the job',
                    cloud)
            else:
                logger.info('Ansible node not required')
                ip_addr = None
                username = None

            # Get the Ansible private key if necessary
            private_key = None
            if ip_addr and username:
                try:
                    with open(CONFIG.get('ansible', 'private_key')) as data:
                        private_key = data.read()
                except IOError:
                    logger.critical(
                        'Unable to open private key for Ansible node from file "%s"',
                        CONFIG.get('ansible', 'private_key'))
                    return False

            # Create complete RADL content
            try:
                radl = Template(str(radl_contents)).substitute(
                    instance=flavour,
                    image=image,
                    cloud=cloud,
                    ansible_ip=ip_addr,
                    ansible_username=username,
                    ansible_private_key=private_key)
            except Exception as ex:
                logger.critical('Error creating RADL from template due to %s',
                                ex)
                return False

        # Check if we should stop
        (im_infra_id_new, infra_status_new, cloud_new, _,
         _) = db.deployment_get_im_infra_id(unique_id)
        if infra_status_new in ('deletion-requested', 'deleted',
                                'deletion-failed', 'deleting'):
            logger.info(
                'Deletion requested of infrastructure, aborting deployment')
            return False

        # Deploy infrastructure
        if resource_type == 'cloud':
            infra_id = cloud_deploy.deploy(
                radl, cloud, time_begin, unique_id, identity, db,
                int(requirements['resources']['instances']))
        elif resource_type == 'batch':
            infra_id = batch_deploy.deploy(
                cloud, time_begin, unique_id, identity, db,
                int(requirements['resources']['instances']))

        if infra_id:
            success = True
            if unique_id:
                # Set cloud and IM infra id
                db.deployment_update_status_with_retries(
                    unique_id, None, cloud, infra_id, resource_type)

                # Final check if we should delete the infrastructure
                (im_infra_id_new, infra_status_new, cloud_new, _,
                 _) = db.deployment_get_im_infra_id(unique_id)
                if infra_status_new in ('deletion-requested', 'deleted',
                                        'deletion-failed', 'deleting'):
                    logger.info(
                        'Deletion requested of infrastructure, aborting deployment'
                    )
                    return False
                else:
                    # Set status
                    db.deployment_update_status_with_retries(
                        unique_id, 'configured')
            break

    if unique_id and not infra_id:
        db.deployment_update_status_reason(unique_id,
                                           'NoMatchingResourcesAvailable')
        return False
    return success
Example #7
0
def deploy(radl, cloud, time_begin, unique_id, identity, db, num_nodes=1):
    """
    Deploy infrastructure from a specified RADL file
    """
    # Get full list of cloud info
    clouds_info_list = utilities.create_clouds_list(
        CONFIG.get('clouds', 'path'))

    # Check & get auth token if necessary
    token = tokens.get_token(cloud, identity, db, clouds_info_list)

    # Setup Open Policy Agent client
    opa_client = opaclient.OPAClient(url=CONFIG.get('opa', 'url'),
                                     timeout=int(CONFIG.get('opa', 'timeout')))

    # Setup Infrastructure Manager client
    im_auth = utilities.create_im_auth(cloud, token, clouds_info_list)
    client = imclient.IMClient(url=CONFIG.get('im', 'url'), data=im_auth)
    (status, msg) = client.getauth()
    if status != 0:
        logger.critical('Error reading IM auth file: %s', msg)
        return None

    # Create RADL content for initial deployment: for multiple nodes we strip out all configure/contextualize
    # blocks and will add this back in once we have successfully deployed all required VMs
    if num_nodes > 1:
        radl_base = utilities.create_basic_radl(radl)
    else:
        radl_base = radl

    # Set availability zone in RADL if necessary
    cloud_info = opa_client.get_cloud(cloud)
    if 'availability_zones' in cloud_info:
        availability_zones = cloud_info['availability_zones']
        if availability_zones:
            random.shuffle(availability_zones)
            logger.info('Using availability zone %s', availability_zones[0])
            radl_base = utilities.set_availability_zone(
                radl_base, availability_zones[0])

    retries_per_cloud = int(CONFIG.get('deployment', 'retries'))
    retry = 0
    success = False
    time_begin_this_cloud = time.time()

    # Retry loop
    while retry < retries_per_cloud + 1 and not success:
        if retry > 0:
            time.sleep(int(CONFIG.get('polling', 'duration')))
        logger.info('Deployment attempt %d of %d', retry + 1,
                    retries_per_cloud + 1)
        retry += 1

        # Check if we should stop
        (im_infra_id_new, infra_status_new, cloud_new, _,
         _) = db.deployment_get_im_infra_id(unique_id)
        if infra_status_new in ('deletion-requested', 'deleted',
                                'deletion-failed', 'deleting'):
            logger.info(
                'Deletion requested of infrastructure, aborting deployment')
            return None

        # Create infrastructure
        (infrastructure_id,
         msg) = client.create(radl_base,
                              int(CONFIG.get('timeouts', 'creation')))

        if infrastructure_id:
            logger.info(
                'Created infrastructure on cloud %s with IM id %s and waiting for it to be configured',
                cloud, infrastructure_id)
            db.deployment_update_status_with_retries(unique_id, None, cloud,
                                                     infrastructure_id)

            time_created = time.time()
            count_unconfigured = 0
            state_previous = None

            fnodes_to_be_replaced = 0
            wnodes_to_be_replaced = 0
            initial_step_complete = False
            multi_node_deletions = 0

            # Wait for infrastructure to enter the configured state
            while True:
                # Sleep
                time.sleep(int(CONFIG.get('polling', 'duration')))

                # Check if we should stop
                (im_infra_id_new, infra_status_new, cloud_new, _,
                 _) = db.deployment_get_im_infra_id(unique_id)
                if infra_status_new in ('deletion-requested', 'deleted',
                                        'deletion-failed', 'deleting'):
                    logger.info(
                        'Deletion requested of infrastructure so aborting deployment'
                    )
                    return None

                # Don't spend too long trying to create infrastructure, give up eventually
                if time.time() - time_begin > int(
                        CONFIG.get('timeouts', 'total')):
                    logger.info(
                        'Giving up, total time waiting is too long, so will destroy infrastructure with IM id %s',
                        infrastructure_id)
                    destroy.destroy(client, infrastructure_id)
                    return None

                # Get the current overall state & states of all VMs in the infrastructure
                (states,
                 msg) = client.getstates(infrastructure_id,
                                         int(CONFIG.get('timeouts', 'status')))

                # If state is not known, wait
                if not states:
                    logger.info(
                        'State is not known for infrastructure with id %s on cloud %s',
                        infrastructure_id, cloud)
                    continue

                # Overall state of infrastructure
                state = None
                have_nodes = -1
                if 'state' in states:
                    if 'state' in states['state']:
                        state = states['state']['state']
                    if 'vm_states' in states['state']:
                        have_nodes = len(states['state']['vm_states'])

                # If the state or number of nodes is unknown, wait
                if not state or have_nodes == -1:
                    logger.warning(
                        'Unable to determine state and/or number of VMs from IM'
                    )
                    continue

                # Log a change in state
                if state != state_previous:
                    logger.info('Infrastructure with IM id %s is in state %s',
                                infrastructure_id, state)
                    state_previous = state

                # Handle difference situation when state is configured
                if state == 'configured':
                    logger.info(
                        'State is configured, NumNodesWanted=%d, NumNodesHave=%d, InitialStepComplete=%d',
                        num_nodes, have_nodes, initial_step_complete)

                    # The final configured state
                    if num_nodes == 1 or (num_nodes > 1
                                          and initial_step_complete):
                        logger.info(
                            'Successfully configured infrastructure on cloud %s, took %d secs',
                            cloud,
                            time.time() - time_begin_this_cloud)
                        success = True
                        return infrastructure_id

                    # Configured state for initial step of multi-node infrastructure
                    if num_nodes > 1 and have_nodes == num_nodes and not initial_step_complete:
                        logger.info(
                            'Successfully configured basic infrastructure on cloud %s, will now apply final configuration',
                            cloud)

                        initial_step_complete = True

                        radl_final = ''
                        for line in radl.split('\n'):
                            if line.startswith('deploy'):
                                line = ''
                            radl_final += '%s\n' % line
                        (exit_code, msg) = client.reconfigure_new(
                            infrastructure_id, radl_final,
                            int(CONFIG.get('timeouts', 'reconfigure')))

                    # Configured state but some nodes failed and were deleted
                    if num_nodes > 1 and have_nodes < num_nodes and not initial_step_complete:
                        logger.info(
                            'Infrastructure is now in the configured state but need to re-create failed VMs'
                        )

                        if fnodes_to_be_replaced > 0:
                            logger.info('Creating %d fnodes',
                                        fnodes_to_be_replaced)
                            radl_new = ''
                            for line in radl_base.split('\n'):
                                if line.startswith('deploy wnode'):
                                    line = ''
                                if line.startswith('deploy fnode'):
                                    line = 'deploy fnode %d\n' % fnodes_to_be_replaced
                                radl_new += '%s\n' % line
                            fnodes_to_be_replaced = 0
                            (exit_code, msg) = client.add_resource(
                                infrastructure_id, radl_new, 120)

                        if wnodes_to_be_replaced > 0:
                            logger.info('Creating %d wnodes',
                                        wnodes_to_be_replaced)
                            radl_new = ''
                            for line in radl_base.split('\n'):
                                if line.startswith('deploy fnode'):
                                    line = ''
                                if line.startswith('deploy wnode'):
                                    line = 'deploy wnode %d\n' % wnodes_to_be_replaced
                                radl_new += '%s\n' % line
                            wnodes_to_be_replaced = 0
                            (exit_code, msg) = client.add_resource(
                                infrastructure_id, radl_new, 120)

                # Destroy infrastructure which is taking too long to enter the configured state
                if time.time() - time_created > int(
                        CONFIG.get('timeouts', 'configured')):
                    logger.warning(
                        'Waiting too long for infrastructure to be configured, so destroying'
                    )
                    opa_client.set_status(cloud, 'configuration-too-long')
                    destroy.destroy(client, infrastructure_id)
                    break

                # Destroy infrastructure which is taking too long to enter the running state
                if time.time() - time_created > int(
                        CONFIG.get('timeouts', 'notrunning')
                ) and state != 'running' and state != 'unconfigured' and num_nodes == 1:
                    logger.warning(
                        'Waiting too long for infrastructure to enter the running state, so destroying'
                    )
                    opa_client.set_status(cloud, 'pending-too-long')
                    destroy.destroy(client, infrastructure_id)
                    break

                # FIXME: This factor of 3 is a hack
                if time.time() - time_created > 3 * int(
                        CONFIG.get('timeouts', 'notrunning')
                ) and state != 'running' and state != 'unconfigured' and num_nodes > 1:
                    logger.warning(
                        'Waiting too long for infrastructure to enter the running state, so destroying'
                    )
                    opa_client.set_status(cloud, 'pending-too-long')
                    destroy.destroy(client, infrastructure_id)
                    break

                # Destroy infrastructure for which deployment failed
                if state == 'failed':
                    if num_nodes > 1:
                        logger.info(
                            'Infrastructure creation failed for some VMs on cloud %s, so deleting these (run %d)',
                            cloud, multi_node_deletions)
                        multi_node_deletions += 1
                        failed_vms = 0
                        for vm_id in states['state']['vm_states']:
                            if states['state']['vm_states'][vm_id] == 'failed':
                                logger.info('Deleting VM with id %d',
                                            int(vm_id))
                                failed_vms += 1

                                # Determine what type of node (fnode or wnode)
                                (exit_code, vm_info) = client.get_vm_info(
                                    infrastructure_id, int(vm_id),
                                    int(CONFIG.get('timeouts', 'deletion')))
                                # FIXME - is found_vm really needed?
                                found_vm = False
                                for info in vm_info['radl']:
                                    if 'state' in info and 'id' in info:
                                        found_vm = True
                                        if 'fnode' in info['id']:
                                            fnodes_to_be_replaced += 1
                                        else:
                                            wnodes_to_be_replaced += 1

                                if not found_vm:
                                    logger.warn(
                                        'Unable to determine type of VM')

                                # Delete the VM
                                (exit_code,
                                 msg_remove) = client.remove_resource(
                                     infrastructure_id, int(vm_id),
                                     int(CONFIG.get('timeouts', 'deletion')))

                        logger.info(
                            'Deleted %d failed VMs from infrastructure',
                            failed_vms)

                        # Check if we have deleted all VMs: in this case IM will return 'unknown' as the status
                        # so it's best to just start again
                        if failed_vms == num_nodes:
                            logger.warning(
                                'All VMs failed and deleted, so destroying infrastructure'
                            )
                            opa_client.set_status(cloud, state)
                            destroy.destroy(client, infrastructure_id)
                            break

                        continue

                    else:
                        logger.warning(
                            'Infrastructure creation failed on cloud %s, so destroying',
                            cloud)
                        opa_client.set_status(cloud, state)
                        destroy.destroy(client, infrastructure_id)
                        break

                # Handle unconfigured infrastructure
                if state == 'unconfigured':
                    count_unconfigured += 1
                    file_unconf = '%s/contmsg-%s-%d.txt' % (CONFIG.get(
                        'logs', 'contmsg'), unique_id, time.time())
                    contmsg = client.getcontmsg(
                        infrastructure_id,
                        int(CONFIG.get('timeouts', 'deletion')))
                    if count_unconfigured < int(
                            CONFIG.get('deployment', 'reconfigures')) + 1:
                        logger.warning(
                            'Infrastructure on cloud %s is unconfigured, will try reconfiguring after writing contmsg to a file',
                            cloud)
                        try:
                            with open(file_unconf, 'w') as unconf:
                                unconf.write(contmsg)
                        except Exception as error:
                            logger.warning('Unable to write contmsg to file')
                        client.reconfigure(
                            infrastructure_id,
                            int(CONFIG.get('timeouts', 'reconfigure')))
                    else:
                        logger.warning(
                            'Infrastructure has been unconfigured too many times, so destroying after writing contmsg to a file'
                        )
                        opa_client.set_status(cloud, state)
                        try:
                            with open(file_unconf, 'w') as unconf:
                                unconf.write(contmsg)
                        except Exception as error:
                            logger.warning('Unable to write contmsg to file')
                        destroy.destroy(client, infrastructure_id)
                        break
        else:
            logger.warning(
                'Deployment failure on cloud %s with id %s with msg="%s"',
                cloud, infrastructure_id, msg)
            if msg == 'timedout':
                logger.warning(
                    'Infrastructure creation failed due to a timeout')
                opa_client.set_status(cloud, 'creation-timeout')
            else:
                file_failed = '%s/failed-%s-%d.txt' % (CONFIG.get(
                    'logs', 'contmsg'), unique_id, time.time())
                opa_client.set_status(cloud, 'creation-failed')
                logger.warning(
                    'Infrastructure creation failed, writing stdout/err to file "%s"',
                    file_failed)
                try:
                    with open(file_failed, 'w') as failed:
                        failed.write(msg)
                except Exception as error:
                    logger.warning('Unable to write contmsg to file')

    return None
Example #8
0
def delete(unique_id):
    """
    Delete the infrastructure with the specified id
    """
    logger.info('Deleting infrastructure')

    db = database.get_db()
    db.connect()

    (im_infra_id, infra_status, cloud, _,
     _) = db.deployment_get_im_infra_id(unique_id)
    logger.info('Obtained IM id %s and cloud %s and status %s', im_infra_id,
                cloud, infra_status)

    # Get full list of cloud info
    clouds_info_list = utilities.create_clouds_list(
        CONFIG.get('clouds', 'path'))

    # Deterime resource type
    resource_type = None
    for cloud_info in clouds_info_list:
        if cloud_info['name'] == cloud:
            resource_type = cloud_info['type']

    if im_infra_id and cloud:
        if resource_type == 'cloud':
            match_obj_name = re.match(
                r'\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b',
                im_infra_id)
            if match_obj_name:
                logger.info('Deleting cloud infrastructure with IM id %s',
                            im_infra_id)

                # Get the identity of the user who created the infrastructure
                identity = db.deployment_get_identity(unique_id)

                # Check & get auth token if necessary
                token = tokens.get_token(cloud, identity, db, clouds_info_list)

                # Setup Infrastructure Manager client
                im_auth = utilities.create_im_auth(cloud, token,
                                                   clouds_info_list)
                client = imclient.IMClient(url=CONFIG.get('im', 'url'),
                                           data=im_auth)
                (status, msg) = client.getauth()
                if status != 0:
                    logger.critical('Error reading IM auth file: %s', msg)
                    db.close()
                    return False

                destroyed = destroy(client, im_infra_id)

                if destroyed:
                    db.deployment_update_status_with_retries(
                        unique_id, 'deleted')
                    logger.info(
                        'Destroyed infrastructure with IM infrastructure id %s',
                        im_infra_id)
                else:
                    db.deployment_update_status_with_retries(
                        unique_id, 'deletion-failed')
                    logger.critical(
                        'Unable to destroy infrastructure with IM infrastructure id %s',
                        im_infra_id)
                    return False
            else:
                logger.critical('IM infrastructure id %s does not match regex',
                                im_infra_id)
                db.deployment_update_status_with_retries(unique_id, 'deleted')
        elif resource_type == 'batch':
            match_obj_name = re.match(r'[\d]+', im_infra_id)
            if match_obj_name:
                logger.info(
                    'Deleting batch infrastructure with HTCondor job id %s',
                    im_infra_id)
                #client = htcondorclient.HTCondorClient()
                #client.destroy(int(im_infra_id))
    else:
        logger.info(
            'No need to destroy infrastructure because resource infrastructure id is %s, resource name is %s, resource type is %s',
            im_infra_id, cloud, resource_type)
        db.deployment_update_status_with_retries(unique_id, 'deleted')
    db.close()
    return True