Example #1
0
    def create_n_watch(context, wf_def, wf_data={}):
        """
        Create a running workflow, and watch it until completion

        Args:
            context (ArgoContext): context to execute the workflow in
            wf_def (dict): workflow definition
            wf_data (dict, optional): data to be passed to workflow. Defaults to {}.

        Returns:
            (ArgoWorkflow, ArgoWorkflowStatus): workflow and status of the workflow
        """
        wf = ArgoWorkflow.create(context, wf_def, wf_data=wf_data)

        try:
            wf.watch(context, 10, 18)
            if wf.last_status.complete:
                return (wf, wf.last_status)
            wf.watch(context, 60, 1440)
        except Exception as exc:
            logger.debug(
                "ARGO, ArgoWorkflow.create_n_watch(), while watching {}".
                format(type(exc)))
            logger.debug(
                "ARGO, ArgoWorkflow.create_n_watch(), while watching {}".
                format(exc))
            raise exc
        return (wf, wf.last_status)
Example #2
0
def mount_failed(
    context,
    exception_msg,
    traceback,
    driverCls,
    provider,
    identity,
    volume_id,
    unmount=False,
    **celery_task_args
):
    from service import volume as volume_service
    try:
        celery_logger.debug("mount_failed task started at %s." % timezone.now())
        celery_logger.info("task context=%s" % context)
        err_str = "%s\nMount Error Traceback:%s" % (exception_msg, traceback)
        celery_logger.error(err_str)
        driver = get_driver(driverCls, provider, identity)
        volume = driver.get_volume(volume_id)
        if unmount:
            tmp_status = 'umount_error'
        else:
            tmp_status = 'mount_error'
        return volume_service._update_volume_metadata(
            driver, volume, metadata={'tmp_status': tmp_status}
        )
    except Exception as exc:
        celery_logger.warn(exc)
        mount_failed.retry(exc=exc)
Example #3
0
def prep_instance_for_snapshot(identity_id, instance_id, **celery_task_args):
    identity = Identity.objects.get(id=identity_id)
    try:
        celery_logger.debug("prep_instance_for_snapshot task started at %s." % timezone.now())
        # NOTE: FIXMEIF the assumption that the 'linux username'
        # is the 'created_by' AtmosphereUser changes.
        username = identity.created_by.username
        driver = get_esh_driver(identity)
        instance = driver.get_instance(instance_id)
        if instance.extra.get('status','') != 'active':
            celery_logger.info("prep_instance_for_snapshot skipped")
            return
        playbooks = deploy_prepare_snapshot(
            instance.ip, username, instance_id)
        celery_logger.info(playbooks.__dict__)
        hostname = build_host_name(instance.id, instance.ip)
        result = False if execution_has_failures(playbooks, hostname)\
            or execution_has_unreachable(playbooks, hostname) else True
        if not result:
            raise Exception(
                "Error encountered while preparing instance for snapshot: %s"
                % playbooks.stats.summarize(host=hostname))
    except Exception as exc:
        celery_logger.warn(exc)
        prep_instance_for_snapshot.retry(exc=exc)
Example #4
0
    def dump_logs(self, context, log_dir):
        """
        Dump logs of the workflow into the log directory provided.
        Separate log file for each pods/steps in the workflow, each with the
        filename of {{pod_name}}.log

        Args:
            context (ArgoContext): context used to fetch the logs
            log_dir (str): directory to dump logs into
        """
        # find out what pods the workflow is consisted of
        json_resp = context.client().get_workflow(self.wf_name)
        pod_names = json_resp["status"]["nodes"].keys()

        # dump logs in separate files for each pods
        for pod_name in pod_names:

            filename = "{}.log".format(pod_name)
            log_file_path = os.path.join(log_dir, filename)

            with open(log_file_path, "a+") as dump_file:
                dump_file.write("workflow {} has {} pods\n".format(
                    self.wf_name, len(pod_names)))
                logs_lines = context.client().get_log_for_pod_in_workflow(
                    self.wf_name, pod_name, container_name="main")
                dump_file.write("\npod {}:\n".format(pod_name))
                dump_file.writelines(logs_lines)
            logger.debug(
                ("ARGO, log dump for workflow {}, pod {} at: {}\n").format(
                    self.wf_name, pod_name, log_file_path))
Example #5
0
def mount_volume_task(
    driverCls,
    provider,
    identity,
    instance_id,
    volume_id,
    device_location,
    mount_location,
    device_type,
    mount_prefix=None,
    *args,
    **kwargs
):
    try:
        celery_logger.debug("mount task started at %s." % timezone.now())
        celery_logger.debug("mount_location: %s" % (mount_location, ))
        driver = get_driver(driverCls, provider, identity)
        username = identity.get_username()
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)

        try:
            attach_data = volume.extra['attachments'][0]
            if not device_location:
                device_location = attach_data['device']
        except (KeyError, IndexError):
            celery_logger.warn(
                "Volume %s missing attachments in Extra" % (volume, )
            )
        if not device_location:
            raise Exception(
                "No device_location found or inferred by volume %s" % volume
            )
        if not mount_prefix:
            mount_prefix = "/vol_"

        last_char = device_location[-1]    # /dev/sdb --> b
        if not mount_location:
            mount_location = mount_prefix + last_char

        playbook_results = deploy_mount_volume(
            instance.ip,
            username,
            instance.id,
            device_location,
            mount_location=mount_location,
            device_type=device_type
        )
        celery_logger.info(playbook_results)
        if execution_has_failures(
            playbook_results
        ) or execution_has_unreachable(playbook_results):
            raise Exception(
                "Error encountered while mounting volume: instance_id: {}, volume_id: {}"
                .format(instance_id, volume_id)
            )
        return mount_location
    except Exception as exc:
        celery_logger.warn(exc)
        mount_volume_task.retry(exc=exc)
def monitor_sizes_for(provider_id, print_logs=False):
    """
    Run the set of tasks related to monitoring sizes for a provider.
    Optionally, provide a list of usernames to monitor
    While debugging, print_logs=True can be very helpful.
    start_date and end_date allow you to search a 'non-standard' window of time.
    """
    from service.driver import get_admin_driver

    if print_logs:
        console_handler = _init_stdout_logging()

    provider = Provider.objects.get(id=provider_id)
    admin_driver = get_admin_driver(provider)
    # Non-End dated sizes on this provider
    db_sizes = Size.objects.filter(only_current(), provider=provider)
    all_sizes = admin_driver.list_sizes()
    seen_sizes = []
    for cloud_size in all_sizes:
        core_size = convert_esh_size(cloud_size, provider.uuid)
        seen_sizes.append(core_size)

    now_time = timezone.now()
    needs_end_date = [size for size in db_sizes if size not in seen_sizes]
    for size in needs_end_date:
        celery_logger.debug("End dating inactive size: %s" % size)
        size.end_date = now_time
        size.save()

    if print_logs:
        _exit_stdout_logging(console_handler)
Example #7
0
def remove_empty_networks_for(provider_id):
    provider = Provider.objects.get(id=provider_id)
    os_driver = get_account_driver(provider)
    all_instances = os_driver.admin_driver.list_all_instances()
    project_map = os_driver.network_manager.project_network_map()
    projects_with_networks = project_map.keys()
    for project in projects_with_networks:
        networks = project_map[project]['network']
        if not isinstance(networks, list):
            networks = [networks]
        for network in networks:
            network_name = network['name']
            celery_logger.debug("Checking if network %s is in use" %
                                network_name)
            if running_instances(network_name, all_instances):
                continue
            # TODO: MUST change when not using 'usergroups' explicitly.
            user = project
            try:
                celery_logger.debug(
                    "Removing project network for User:%s, Project:%s" %
                    (user, project))
                os_driver.network_manager.delete_project_network(user, project)
            except NeutronClientException:
                celery_logger.exception("Neutron unable to remove project"
                                        "network for %s-%s" % (user, project))
            except NeutronException:
                celery_logger.exception("Neutron unable to remove project"
                                        "network for %s-%s" % (user, project))
Example #8
0
def update_snapshot_cyverse(start_date=None, end_date=None):
    all_sources = AllocationSource.objects.order_by('name')
    n = settings.ALLOC_SNAPSHOT_SIZE
    num_sources = len(all_sources)
    if num_sources > n:
        for i in range(0, len(all_sources), n):
            logger.debug(
                "Updating {} of {} allocation sources ".format(n, num_sources)
            )
            update_snapshot_cyverse_for.apply_async(
                args=(all_sources[i:i + n], ),
                kwargs={
                    'start_date': start_date,
                    'end_date': end_date
                },
                expires=15 * 60
            )
    else:
        logger.debug(
            "Updating all {} allocation sources (snapshot size is {})".format(
                num_sources, n
            )
        )
        update_snapshot_cyverse_for(
            all_sources, start_date=start_date, end_date=end_date
        )
def remove_empty_networks_for(provider_id):
    provider = Provider.objects.get(id=provider_id)
    os_driver = get_account_driver(provider)
    all_instances = os_driver.admin_driver.list_all_instances()
    project_map = os_driver.network_manager.project_network_map()
    projects_with_networks = project_map.keys()
    for project in projects_with_networks:
        networks = project_map[project]['network']
        if not isinstance(networks, list):
            networks = [networks]
        for network in networks:
            network_name = network['name']
            celery_logger.debug("Checking if network %s is in use" % network_name)
            if running_instances(network_name, all_instances):
                continue
            # TODO: MUST change when not using 'usergroups' explicitly.
            user = project
            try:
                celery_logger.debug("Removing project network for User:%s, Project:%s"
                             % (user, project))
                os_driver.network_manager.delete_project_network(user, project)
            except NeutronClientException:
                celery_logger.exception("Neutron unable to remove project"
                                 "network for %s-%s" % (user, project))
            except NeutronException:
                celery_logger.exception("Neutron unable to remove project"
                                 "network for %s-%s" % (user, project))
Example #10
0
def update_mount_location(new_mount_location,
                          driverCls, provider, identity,
                          volume_alias):
    """
    """
    from service import volume as volume_service
    try:
        celery_logger.debug(
            "update_mount_location task started at %s." %
            datetime.now())
        driver = get_driver(driverCls, provider, identity)
        volume = driver.get_volume(volume_alias)
        if not volume:
            return
        if not new_mount_location:
            return
        volume_metadata = volume.extra['metadata']
        return volume_service._update_volume_metadata(
            driver, volume,
            metadata={'mount_location': new_mount_location})
        celery_logger.debug(
            "update_mount_location task finished at %s." %
            datetime.now())
    except Exception as exc:
        celery_logger.exception(exc)
        update_mount_location.retry(exc=exc)
Example #11
0
def update_mount_location(new_mount_location,
                          driverCls, provider, identity,
                          volume_alias):
    """
    """
    from service import volume as volume_service
    try:
        celery_logger.debug(
            "update_mount_location task started at %s." %
            datetime.now())
        driver = get_driver(driverCls, provider, identity)
        volume = driver.get_volume(volume_alias)
        if not volume:
            return
        if not new_mount_location:
            return
        volume_metadata = volume.extra['metadata']
        return volume_service._update_volume_metadata(
            driver, volume,
            metadata={'mount_location': new_mount_location})
        celery_logger.debug(
            "update_mount_location task finished at %s." %
            datetime.now())
    except Exception as exc:
        celery_logger.exception(exc)
        update_mount_location.retry(exc=exc)
Example #12
0
def monitor_sizes_for(provider_id, print_logs=False):
    """
    Run the set of tasks related to monitoring sizes for a provider.
    Optionally, provide a list of usernames to monitor
    While debugging, print_logs=True can be very helpful.
    start_date and end_date allow you to search a 'non-standard' window of time.
    """
    from service.driver import get_admin_driver

    if print_logs:
        import logging
        import sys
        consolehandler = logging.StreamHandler(sys.stdout)
        consolehandler.setLevel(logging.DEBUG)
        celery_logger.addHandler(consolehandler)

    provider = Provider.objects.get(id=provider_id)
    admin_driver = get_admin_driver(provider)
    # Non-End dated sizes on this provider
    db_sizes = Size.objects.filter(only_current(), provider=provider)
    all_sizes = admin_driver.list_sizes()
    seen_sizes = []
    for cloud_size in all_sizes:
        core_size = convert_esh_size(cloud_size, provider.uuid)
        seen_sizes.append(core_size)

    now_time = timezone.now()
    needs_end_date = [size for size in db_sizes if size not in seen_sizes]
    for size in needs_end_date:
        celery_logger.debug("End dating inactive size: %s" % size)
        size.end_date = now_time
        size.save()

    if print_logs:
        celery_logger.removeHandler(consolehandler)
Example #13
0
def prep_instance_for_snapshot(identity_id, instance_id, **celery_task_args):
    identity = Identity.objects.get(id=identity_id)
    try:
        celery_logger.debug("prep_instance_for_snapshot task started at %s." %
                            timezone.now())
        # NOTE: FIXMEIF the assumption that the 'linux username'
        # is the 'created_by' AtmosphereUser changes.
        username = identity.created_by.username
        driver = get_esh_driver(identity)
        instance = driver.get_instance(instance_id)
        if instance.extra.get('status', '') != 'active':
            celery_logger.info("prep_instance_for_snapshot skipped")
            return
        playbooks = deploy_prepare_snapshot(instance.ip, username, instance_id)
        celery_logger.info(playbooks.__dict__)
        hostname = build_host_name(instance.id, instance.ip)
        result = False if execution_has_failures(playbooks, hostname)\
            or execution_has_unreachable(playbooks, hostname) else True
        if not result:
            raise Exception(
                "Error encountered while preparing instance for snapshot: %s" %
                playbooks.stats.summarize(host=hostname))
    except Exception as exc:
        celery_logger.warn(exc)
        prep_instance_for_snapshot.retry(exc=exc)
Example #14
0
def monitor_allocation_sources(usernames=()):
    """
    Monitor allocation sources, if a snapshot shows that all compute has been used, then enforce as necessary
    """
    celery_logger.debug('monitor_allocation_sources - usernames: %s',
                        usernames)
    allocation_sources = AllocationSource.objects.all()
    for allocation_source in allocation_sources.order_by('name'):
        celery_logger.debug(
            'monitor_allocation_sources - allocation_source: %s',
            allocation_source)
        for user in allocation_source.all_users.order_by('username'):
            celery_logger.debug('monitor_allocation_sources - user: %s', user)
            if usernames and user.username not in usernames:
                celery_logger.info("Skipping User %s - not in the list" %
                                   user.username)
                continue
            over_allocation = allocation_source.is_over_allocation(user)
            celery_logger.debug(
                'monitor_allocation_sources - user: %s, over_allocation: %s',
                user, over_allocation)
            if not over_allocation:
                continue
            celery_logger.debug(
                'monitor_allocation_sources - Going to enforce on user user: %s',
                user)
            allocation_source_overage_enforcement_for_user.apply_async(
                args=(allocation_source, user))
Example #15
0
def mount_failed(task_uuid,
                 driverCls,
                 provider,
                 identity,
                 volume_id,
                 unmount=False,
                 **celery_task_args):
    from service import volume as volume_service
    try:
        celery_logger.debug("mount_failed task started at %s." %
                            datetime.now())
        celery_logger.info("task_uuid=%s" % task_uuid)
        result = app.AsyncResult(task_uuid)
        with allow_join_result():
            exc = result.get(propagate=False)
        err_str = "Mount Error Traceback:%s" % (result.traceback, )
        celery_logger.error(err_str)
        driver = get_driver(driverCls, provider, identity)
        volume = driver.get_volume(volume_id)
        if unmount:
            tmp_status = 'umount_error'
        else:
            tmp_status = 'mount_error'
        return volume_service._update_volume_metadata(
            driver, volume, metadata={'tmp_status': tmp_status})
        celery_logger.debug("mount_failed task finished at %s." %
                            datetime.now())
    except Exception as exc:
        celery_logger.warn(exc)
        mount_failed.retry(exc=exc)
Example #16
0
def umount_task(driverCls, provider, identity, instance_id, volume_id, *args, **kwargs):
    try:
        celery_logger.debug("umount_task started at %s." % datetime.now())
        driver = get_driver(driverCls, provider, identity)
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)
        attach_data = volume.extra["attachments"][0]
        device = attach_data["device"]

        # Check mount to find the mount_location for device
        private_key = "/opt/dev/atmosphere/extras/ssh/id_rsa"
        kwargs.update({"ssh_key": private_key})
        kwargs.update({"timeout": 120})

        mount_location = None
        cm_script = check_mount()
        kwargs.update({"deploy": cm_script})
        driver.deploy_to(instance, **kwargs)
        regex = re.compile("(?P<device>[\w/]+) on (?P<location>.*) type")
        for line in cm_script.stdout.split("\n"):
            res = regex.search(line)
            if not res:
                continue
            search_dict = res.groupdict()
            dev_found = search_dict["device"]
            if device == dev_found:
                mount_location = search_dict["location"]
                break

        # Volume not mounted, move along..
        if not mount_location:
            return

        um_script = umount_volume(device)
        kwargs.update({"deploy": um_script})
        driver.deploy_to(instance, **kwargs)

        if "device is busy" in um_script.stdout:
            # Show all processes that are making device busy..
            lsof_script = lsof_location(mount_location)
            kwargs.update({"deploy": lsof_script})
            driver.deploy_to(instance, **kwargs)

            regex = re.compile("(?P<name>[\w]+)\s*(?P<pid>[\d]+)")
            offending_processes = []
            for line in lsof_script.stdout.split("\n"):
                res = regex.search(line)
                if not res:
                    continue
                search_dict = res.groupdict()
                offending_processes.append((search_dict["name"], search_dict["pid"]))

            raise DeviceBusyException(mount_location, offending_processes)
        # Return here if no errors occurred..
        celery_logger.debug("umount_task finished at %s." % datetime.now())
    except DeviceBusyException:
        raise
    except Exception as exc:
        celery_logger.warn(exc)
        umount_task.retry(exc=exc)
Example #17
0
def unmount_volume_task(driverCls, provider, identity, instance_id, volume_id,
                        *args, **kwargs):
    try:
        celery_logger.debug("unmount task started at %s." % timezone.now())
        driver = get_driver(driverCls, provider, identity)
        username = identity.get_username()
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)
        device_location = None

        try:
            attach_data = volume.extra['attachments'][0]
            device_location = attach_data['device']
        except (KeyError, IndexError):
            celery_logger.warn("Volume %s missing attachments in Extra" %
                               (volume, ))
        if not device_location:
            raise Exception(
                "No device_location found or inferred by volume %s" % volume)
        try:
            playbooks = deploy_unmount_volume(instance.ip, username,
                                              instance.id, device_location)
        except DeviceBusyException:
            # Future-Fixme: Update VolumeStatusHistory.extra, set status to 'unmount_failed'
            raise
        hostname = build_host_name(instance.id, instance.ip)
        result = False if execution_has_failures(playbooks, hostname)\
            or execution_has_unreachable(playbooks, hostname) else True
        if not result:
            raise Exception("Error encountered while unmounting volume: %s" %
                            playbooks.stats.summarize(host=hostname))
        return device_location
    except Exception as exc:
        celery_logger.warn(exc)
        unmount_volume_task.retry(exc=exc)
Example #18
0
def update_snapshot_cyverse(start_date=None, end_date=None):
    logger.debug("update_snapshot_cyverse task started at %s." %
                 datetime.now())
    end_date = timezone.now().replace(
        microsecond=0) if not end_date else end_date

    for allocation_source in AllocationSource.objects.order_by('name'):
        # calculate and save snapshots here
        allocation_source_name = allocation_source.name
        last_renewal_event = EventTable.objects.filter(
            name='allocation_source_created_or_renewed',
            payload__allocation_source_name__exact=str(
                allocation_source_name)).order_by('timestamp')

        if not last_renewal_event:
            logger.info('Allocation Source %s Create/Renewal event missing',
                        allocation_source_name)
            continue

        start_date = last_renewal_event.last().timestamp.replace(
            microsecond=0) if not start_date else start_date

        total_compute_used = 0
        total_burn_rate = 0
        for user in allocation_source.all_users:
            compute_used, burn_rate = total_usage(
                user.username,
                start_date=start_date,
                end_date=end_date,
                allocation_source_name=allocation_source_name,
                burn_rate=True)

            UserAllocationSnapshot.objects.update_or_create(
                allocation_source=allocation_source,
                user=user,
                defaults={
                    'compute_used': compute_used,
                    'burn_rate': burn_rate
                })
            total_compute_used += compute_used
            total_burn_rate += burn_rate
        AllocationSourceSnapshot.objects.update_or_create(
            allocation_source=allocation_source,
            defaults={
                'compute_used': total_compute_used,
                'global_burn_rate': total_burn_rate
            })

        run_all(
            rule_list=cyverse_rules,
            defined_variables=CyverseTestRenewalVariables(
                allocation_source, end_date, start_date),
            defined_actions=CyverseTestRenewalActions(allocation_source,
                                                      end_date),
        )
    # At the end of the task, fire-off an allocation threshold check
    logger.debug("update_snapshot_cyverse task finished at %s." %
                 datetime.now())
    allocation_threshold_check.apply_async()
Example #19
0
def add_membership_task(image_version, group):
    celery_logger.debug("add_membership_task task started at %s." % timezone.now())
    try:
        add_membership(image_version, group)
        celery_logger.debug("add_membership_task task finished at %s." % timezone.now())
    except Exception as exc:
        celery_logger.exception(exc)
        add_membership_task.retry(exc=exc)
Example #20
0
def monitor_instances_for(provider_id, users=None,
                          print_logs=False, check_allocations=False, start_date=None, end_date=None):
    """
    Run the set of tasks related to monitoring instances for a provider.
    Optionally, provide a list of usernames to monitor
    While debugging, print_logs=True can be very helpful.
    start_date and end_date allow you to search a 'non-standard' window of time.
    """
    provider = Provider.objects.get(id=provider_id)

    # For now, lets just ignore everything that isn't openstack.
    if 'openstack' not in provider.type.name.lower():
        return

    instance_map = _get_instance_owner_map(provider, users=users)

    if print_logs:
        console_handler = _init_stdout_logging()

    # DEVNOTE: Potential slowdown running multiple functions
    # Break this out when instance-caching is enabled
    running_total = 0
    if not settings.ENFORCING:
        celery_logger.debug('Settings dictate allocations are NOT enforced')
    for username in sorted(instance_map.keys()):
        running_instances = instance_map[username]
        running_total += len(running_instances)
        identity = _get_identity_from_tenant_name(provider, username)
        if identity and running_instances:
            try:
                driver = get_cached_driver(identity=identity)
                core_running_instances = [
                    convert_esh_instance(
                        driver,
                        inst,
                        identity.provider.uuid,
                        identity.uuid,
                        identity.created_by) for inst in running_instances]
            except Exception as exc:
                celery_logger.exception(
                    "Could not convert running instances for %s" %
                    username)
                continue
        else:
            # No running instances.
            core_running_instances = []
        # Using the 'known' list of running instances, cleanup the DB
        core_instances = _cleanup_missing_instances(
            identity,
            core_running_instances)
        if check_allocations:
            allocation_result = user_over_allocation_enforcement(
                provider, username,
                print_logs, start_date, end_date)
    if print_logs:
        _exit_stdout_logging(console_handler)
    return running_total
def running_instances(network_name, all_instances):
    for instance in all_instances:
        if network_name in instance.extra['addresses'].keys():
            #    #If not build/active, the network is assumed to be NOT in use
            celery_logger.debug("Network %s is in use, Active Instance:%s"
                         % (network_name, instance.id))
            return True
    celery_logger.debug("Network %s is NOT in use" % network_name)
    return False
Example #22
0
def running_instances(network_name, all_instances):
    for instance in all_instances:
        if network_name in instance.extra['addresses'].keys():
            #    #If not build/active, the network is assumed to be NOT in use
            celery_logger.debug("Network %s is in use, Active Instance:%s" %
                                (network_name, instance.id))
            return True
    celery_logger.debug("Network %s is NOT in use" % network_name)
    return False
def monitor_instances_for(provider_id,
                          users=None,
                          print_logs=False,
                          check_allocations=False,
                          start_date=None,
                          end_date=None):
    """
    Run the set of tasks related to monitoring instances for a provider.
    Optionally, provide a list of usernames to monitor
    While debugging, print_logs=True can be very helpful.
    start_date and end_date allow you to search a 'non-standard' window of time.
    """
    provider = Provider.objects.get(id=provider_id)

    # For now, lets just ignore everything that isn't openstack.
    if 'openstack' not in provider.type.name.lower():
        return

    instance_map = _get_instance_owner_map(provider, users=users)

    if print_logs:
        console_handler = _init_stdout_logging()

    # DEVNOTE: Potential slowdown running multiple functions
    # Break this out when instance-caching is enabled
    running_total = 0
    if not settings.ENFORCING:
        celery_logger.debug('Settings dictate allocations are NOT enforced')
    for username in sorted(instance_map.keys()):
        running_instances = instance_map[username]
        running_total += len(running_instances)
        identity = _get_identity_from_tenant_name(provider, username)
        if identity and running_instances:
            try:
                driver = get_cached_driver(identity=identity)
                core_running_instances = [
                    convert_esh_instance(driver, inst, identity.provider.uuid,
                                         identity.uuid, identity.created_by)
                    for inst in running_instances
                ]
            except Exception as exc:
                celery_logger.exception(
                    "Could not convert running instances for %s" % username)
                continue
        else:
            # No running instances.
            core_running_instances = []
        # Using the 'known' list of running instances, cleanup the DB
        core_instances = _cleanup_missing_instances(identity,
                                                    core_running_instances)
        if check_allocations:
            allocation_result = user_over_allocation_enforcement(
                provider, username, print_logs, start_date, end_date)
    if print_logs:
        _exit_stdout_logging(console_handler)
    return running_total
Example #24
0
def add_membership_task(image_version, group):
    celery_logger.debug("add_membership_task task started at %s." %
                        timezone.now())
    try:
        add_membership(image_version, group)
        celery_logger.debug("add_membership_task task finished at %s." %
                            timezone.now())
    except Exception as exc:
        celery_logger.exception(exc)
        add_membership_task.retry(exc=exc)
Example #25
0
def monitor_sizes_for(provider_id, print_logs=False):
    """
    Run the set of tasks related to monitoring sizes for a provider.
    Optionally, provide a list of usernames to monitor
    While debugging, print_logs=True can be very helpful.
    start_date and end_date allow you to search a 'non-standard' window of time.
    """
    from service.driver import get_admin_driver

    if print_logs:
        console_handler = _init_stdout_logging()

    provider = Provider.objects.get(id=provider_id)
    admin_driver = get_admin_driver(provider)
    # Non-End dated sizes on this provider
    db_sizes = Size.objects.filter(only_current(), provider=provider)
    all_sizes = admin_driver.list_sizes()
    seen_sizes = []
    for cloud_size in all_sizes:
        core_size = convert_esh_size(cloud_size, provider.uuid)
        seen_sizes.append(core_size)

    now_time = timezone.now()
    needs_end_date = [size for size in db_sizes if size not in seen_sizes]
    for size in needs_end_date:
        celery_logger.debug("End dating inactive size: %s" % size)
        size.end_date = now_time
        size.save()

    # Find home for 'Unknown Size'
    unknown_sizes = Size.objects.filter(
        provider=provider, name__contains='Unknown Size'
    )
    for size in unknown_sizes:
        # Lookup sizes may not show up in 'list_sizes'
        if size.alias == 'N/A':
            continue    # This is a sentinal value added for a separate purpose.
        try:
            libcloud_size = admin_driver.get_size(
                size.alias, forced_lookup=True
            )
        except BaseHTTPError as error:
            if error.code == 404:
                # The size may have been truly deleted
                continue
        if not libcloud_size:
            continue
        cloud_size = OSSize(libcloud_size)
        core_size = convert_esh_size(cloud_size, provider.uuid)

    if print_logs:
        _exit_stdout_logging(console_handler)
    for size in seen_sizes:
        size.esh = None
    return seen_sizes
Example #26
0
def monitor_volumes_for(provider_id, print_logs=False):
    """
    Run the set of tasks related to monitoring sizes for a provider.
    Optionally, provide a list of usernames to monitor
    While debugging, print_logs=True can be very helpful.
    start_date and end_date allow you to search a 'non-standard' window of time.
    """
    from service.driver import get_account_driver
    from core.models import Identity
    if print_logs:
        console_handler = _init_stdout_logging()

    provider = Provider.objects.get(id=provider_id)
    account_driver = get_account_driver(provider)
    # Non-End dated volumes on this provider
    db_volumes = Volume.objects.filter(only_current_source(), instance_source__provider=provider)
    all_volumes = account_driver.admin_driver.list_all_volumes(timeout=30)
    seen_volumes = []
    for cloud_volume in all_volumes:
        try:
            core_volume = convert_esh_volume(cloud_volume, provider_uuid=provider.uuid)
            seen_volumes.append(core_volume)
        except ObjectDoesNotExist:
            tenant_id = cloud_volume.extra['object']['os-vol-tenant-attr:tenant_id']
            tenant = account_driver.get_project_by_id(tenant_id)
            tenant_name = tenant.name if tenant else tenant_id
            try:
                if not tenant:
                    celery_logger.warn("Warning: tenant_id %s found on volume %s, but did not exist from the account driver perspective.", tenant_id, cloud_volume)
                    raise ObjectDoesNotExist()
                identity = Identity.objects.filter(
                    contains_credential('ex_project_name', tenant_name), provider=provider
                ).first()
                if not identity:
                    raise ObjectDoesNotExist()
                core_volume = convert_esh_volume(
                    cloud_volume,
                    provider.uuid, identity.uuid,
                    identity.created_by)
            except ObjectDoesNotExist:
                celery_logger.info("Skipping Volume %s - No Identity for: Provider:%s + Project Name:%s" % (cloud_volume.id, provider, tenant_name))
            pass

    now_time = timezone.now()
    needs_end_date = [volume for volume in db_volumes if volume not in seen_volumes]
    for volume in needs_end_date:
        celery_logger.debug("End dating inactive volume: %s" % volume)
        volume.end_date = now_time
        volume.save()

    if print_logs:
        _exit_stdout_logging(console_handler)
    for vol in seen_volumes:
        vol.esh = None
    return [vol.instance_source.identifier for vol in seen_volumes]
Example #27
0
def monitor_volumes_for(provider_id, print_logs=False):
    """
    Run the set of tasks related to monitoring sizes for a provider.
    Optionally, provide a list of usernames to monitor
    While debugging, print_logs=True can be very helpful.
    start_date and end_date allow you to search a 'non-standard' window of time.
    """
    from service.driver import get_account_driver
    from core.models import Identity
    if print_logs:
        console_handler = _init_stdout_logging()

    provider = Provider.objects.get(id=provider_id)
    account_driver = get_account_driver(provider)
    # Non-End dated volumes on this provider
    db_volumes = Volume.objects.filter(only_current_source(), instance_source__provider=provider)
    all_volumes = account_driver.admin_driver.list_all_volumes(timeout=30)
    seen_volumes = []
    for cloud_volume in all_volumes:
        try:
            core_volume = convert_esh_volume(cloud_volume, provider_uuid=provider.uuid)
            seen_volumes.append(core_volume)
        except ObjectDoesNotExist:
            tenant_id = cloud_volume.extra['object']['os-vol-tenant-attr:tenant_id']
            tenant = account_driver.get_project_by_id(tenant_id)
            tenant_name = tenant.name if tenant else tenant_id
            try:
                if not tenant:
                    celery_logger.warn("Warning: tenant_id %s found on volume %s, but did not exist from the account driver perspective.", tenant_id, cloud_volume)
                    raise ObjectDoesNotExist()
                identity = Identity.objects.filter(
                    contains_credential('ex_project_name', tenant_name), provider=provider
                ).first()
                if not identity:
                    raise ObjectDoesNotExist()
                core_volume = convert_esh_volume(
                    cloud_volume,
                    provider.uuid, identity.uuid,
                    identity.created_by)
            except ObjectDoesNotExist:
                celery_logger.info("Skipping Volume %s - No Identity for: Provider:%s + Project Name:%s" % (cloud_volume.id, provider, tenant_name))
            pass

    now_time = timezone.now()
    needs_end_date = [volume for volume in db_volumes if volume not in seen_volumes]
    for volume in needs_end_date:
        celery_logger.debug("End dating inactive volume: %s" % volume)
        volume.end_date = now_time
        volume.save()

    if print_logs:
        _exit_stdout_logging(console_handler)
    for vol in seen_volumes:
        vol.esh = None
    return [vol.instance_source.identifier for vol in seen_volumes]
Example #28
0
def monitor_instances_for(
    provider_id, users=None, print_logs=False, start_date=None, end_date=None
):
    """
    Run the set of tasks related to monitoring instances for a provider.
    Optionally, provide a list of usernames to monitor
    While debugging, print_logs=True can be very helpful.
    start_date and end_date allow you to search a 'non-standard' window of time.
    """
    provider = Provider.objects.get(id=provider_id)

    # For now, lets just ignore everything that isn't openstack.
    if 'openstack' not in provider.type.name.lower():
        return
    instance_map = _get_instance_owner_map(provider, users=users)

    if print_logs:
        console_handler = _init_stdout_logging()
    seen_instances = []
    # DEVNOTE: Potential slowdown running multiple functions
    # Break this out when instance-caching is enabled
    if not settings.ENFORCING:
        celery_logger.debug('Settings dictate allocations are NOT enforced')
    for tenant_name in sorted(instance_map.keys()):
        running_instances = instance_map[tenant_name]
        identity = _get_identity_from_tenant_name(provider, tenant_name)
        if identity and running_instances:
            try:
                driver = get_cached_driver(identity=identity)
                core_running_instances = [
                    convert_esh_instance(
                        driver, inst, identity.provider.uuid, identity.uuid,
                        identity.created_by
                    ) for inst in running_instances
                ]
                seen_instances.extend(core_running_instances)
            except Exception:
                celery_logger.exception(
                    "Could not convert running instances for %s" % tenant_name
                )
                continue
        else:
            # No running instances.
            core_running_instances = []
        # Using the 'known' list of running instances, cleanup the DB
        _cleanup_missing_instances(identity, core_running_instances)
    if print_logs:
        _exit_stdout_logging(console_handler)
    # return seen_instances  NOTE: this has been commented out to avoid PicklingError!
    # TODO: Uncomment the above, Determine what _we can return_ and return that instead....
    return
Example #29
0
def allocation_source_overage_enforcement_for_user(allocation_source, user):
    celery_logger.debug('allocation_source_overage_enforcement_for_user - allocation_source: %s, user: %s',
                        allocation_source, user)
    user_instances = []
    for identity in user.current_identities:
        try:
            celery_logger.debug('allocation_source_overage_enforcement_for_user - identity: %s', identity)
            affected_instances = allocation_source_overage_enforcement_for(allocation_source, user, identity)
            user_instances.extend(affected_instances)
        except Exception:
            celery_logger.exception(
                'allocation_source_overage_enforcement_for allocation_source: %s, user: %s, and identity: %s',
                allocation_source, user, identity)
    return user_instances
Example #30
0
def allocation_source_overage_enforcement_for_user(allocation_source, user):
    celery_logger.debug('allocation_source_overage_enforcement_for_user - allocation_source: %s, user: %s',
                        allocation_source, user)
    user_instances = []
    for identity in user.current_identities:
        try:
            celery_logger.debug('allocation_source_overage_enforcement_for_user - identity: %s', identity)
            affected_instances = allocation_source_overage_enforcement_for(allocation_source, user, identity)
            user_instances.extend(affected_instances)
        except Exception:
            celery_logger.exception(
                'allocation_source_overage_enforcement_for allocation_source: %s, user: %s, and identity: %s',
                allocation_source, user, identity)
    return user_instances
Example #31
0
def update_volume_metadata(driverCls, provider, identity, volume_alias, metadata):
    """
    """
    from service import volume as volume_service

    try:
        celery_logger.debug("update_volume_metadata task started at %s." % datetime.now())
        driver = get_driver(driverCls, provider, identity)
        volume = driver.get_volume(volume_alias)
        if not volume:
            return
        return volume_service.update_volume_metadata(driver, volume, metadata=metadata)
        celery_logger.debug("volume_metadata task finished at %s." % datetime.now())
    except Exception as exc:
        celery_logger.exception(exc)
        update_volume_metadata.retry(exc=exc)
Example #32
0
def remove_empty_networks_for(provider_id):
    provider = Provider.objects.get(id=provider_id)
    os_driver = get_account_driver(provider)
    if not os_driver:
        celery_logger.warn(
            "Cannot remove_empty_networks_for provider %s -- Account Driver not created"
            % provider)
        return
    all_instances = os_driver.admin_driver.list_all_instances()
    project_map = os_driver.network_manager.project_network_map()
    known_project_names = Credential.objects.filter(
        key='ex_project_name').values_list('value', flat=True)
    projects_with_networks = sorted(
        [k for k in project_map.keys() if k in known_project_names])
    for project in projects_with_networks:
        networks = project_map[project]['network']
        if not isinstance(networks, list):
            networks = [networks]
        for network in networks:
            network_name = network['name']
            celery_logger.debug("Checking if network %s is in use" %
                                network_name)
            if running_instances(network_name, all_instances):
                continue
            user = project
            identity = Identity.objects.filter(
                provider_id=provider_id,
                credential__key='ex_project_name',
                credential__value=project).filter(
                    credential__key='key', credential__value=user).first()
            if not identity:
                celery_logger.warn(
                    "NOT Removing project network for User:%s, Project:%s -- No Valid Identity found!"
                    % (user, project))
                continue
            try:
                celery_logger.debug(
                    "Removing project network for User:%s, Project:%s" %
                    (user, project))
                os_driver.delete_user_network(identity)
            except NeutronClientException:
                celery_logger.exception("Neutron unable to remove project"
                                        "network for %s-%s" % (user, project))
            except NeutronException:
                celery_logger.exception("Neutron unable to remove project"
                                        "network for %s-%s" % (user, project))
Example #33
0
def check_volume_task(driverCls, provider, identity, instance_id, volume_id,
                      *args, **kwargs):
    try:
        celery_logger.debug("check_volume task started at %s." %
                            datetime.now())
        driver = get_driver(driverCls, provider, identity)
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)
        attach_data = volume.extra['attachments'][0]
        device = attach_data['device']

        private_key = ATMOSPHERE_PRIVATE_KEYFILE
        kwargs.update({'ssh_key': private_key})
        kwargs.update({'timeout': 120})

        # One script to make two checks:
        # 1. Voume exists 2. Volume has a filesystem
        cv_script = check_volume(device)
        # NOTE: non_zero_deploy needed to stop LibcloudDeploymentError from being
        # raised
        kwargs.update({'deploy': cv_script, 'non_zero_deploy': True})
        driver.deploy_to(instance, **kwargs)
        kwargs.pop('non_zero_deploy', None)
        # Script execute

        if cv_script.exit_status != 0:
            if 'No such file' in cv_script.stdout:
                raise Exception('Volume check failed: %s. '
                                'Device %s does not exist on instance %s' %
                                (volume, device, instance))
            elif 'Bad magic number' in cv_script.stdout:
                # Filesystem needs to be created for this device
                celery_logger.info("Mkfs needed")
                mkfs_script = mkfs_volume(device)
                kwargs.update({'deploy': mkfs_script})
                driver.deploy_to(instance, **kwargs)
            else:
                raise Exception('Volume check failed: Something weird')

        celery_logger.debug("check_volume task finished at %s." %
                            datetime.now())
    except LibcloudDeploymentError as exc:
        celery_logger.exception(exc)
    except Exception as exc:
        celery_logger.warn(exc)
        check_volume_task.retry(exc=exc)
Example #34
0
def check_volume_task(driverCls, provider, identity,
                      instance_id, volume_id, *args, **kwargs):
    try:
        celery_logger.debug("check_volume task started at %s." % datetime.now())
        driver = get_driver(driverCls, provider, identity)
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)
        attach_data = volume.extra['attachments'][0]
        device = attach_data['device']

        private_key = ATMOSPHERE_PRIVATE_KEYFILE
        kwargs.update({'ssh_key': private_key})
        kwargs.update({'timeout': 120})

        # One script to make two checks:
        # 1. Voume exists 2. Volume has a filesystem
        cv_script = check_volume(device)
        # NOTE: non_zero_deploy needed to stop LibcloudDeploymentError from being
        # raised
        kwargs.update({'deploy': cv_script,
                       'non_zero_deploy': True})
        driver.deploy_to(instance, **kwargs)
        kwargs.pop('non_zero_deploy', None)
        # Script execute

        if cv_script.exit_status != 0:
            if 'No such file' in cv_script.stdout:
                raise Exception('Volume check failed: %s. '
                                'Device %s does not exist on instance %s'
                                % (volume, device, instance))
            elif 'Bad magic number' in cv_script.stdout:
                # Filesystem needs to be created for this device
                celery_logger.info("Mkfs needed")
                mkfs_script = mkfs_volume(device)
                kwargs.update({'deploy': mkfs_script})
                driver.deploy_to(instance, **kwargs)
            else:
                raise Exception('Volume check failed: Something weird')

        celery_logger.debug("check_volume task finished at %s." % datetime.now())
    except LibcloudDeploymentError as exc:
        celery_logger.exception(exc)
    except Exception as exc:
        celery_logger.warn(exc)
        check_volume_task.retry(exc=exc)
Example #35
0
def check_volume_task(
    driverCls,
    provider,
    identity,
    instance_id,
    volume_id,
    device_type='ext4',
    *args,
    **kwargs
):
    try:
        celery_logger.debug("check_volume task started at %s." % timezone.now())
        driver = get_driver(driverCls, provider, identity)
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)
        username = identity.get_username()
        attach_data = volume.extra['attachments'][0]
        device_location = attach_data['device']
        celery_logger.info("device_location: %s" % device_location)

        # One playbook to make two checks:
        # 1. Voume exists
        # 2. Volume has a filesystem
        #    (If not, create one of type 'device_type')
        playbook_results = deploy_check_volume(
            instance.ip,
            username,
            instance.id,
            device_location,
            device_type=device_type
        )
        success = not (
            execution_has_failures(playbook_results)
            or execution_has_unreachable(playbook_results)
        )
        if not success:
            raise Exception(
                "Error encountered while checking volume for filesystem: instance_id: {}, volume_id: {}"
                .format(instance_id, volume_id)
            )
        return success
    except Exception as exc:
        celery_logger.warn(exc)
        check_volume_task.retry(exc=exc)
Example #36
0
    def dump_pod_logs(self, context, pod_name, log_file_path):
        """
        Dump logs of a pod in the workflow into a log file at the given path.
        Technically, it is node_name, calling it the method dump_pod_logs & argument
        pod_name is just to conform to the name in the url in swagger doc.

        Args:
            context (ArgoContext): context used to fetch the logs
            pod_name (str): name of the pod
            log_file_path (str): path to the log file
        """
        # find out what pods the workflow is consisted of
        with open(log_file_path, "a+") as log_file:
            logs_lines = context.client().get_log_for_pod_in_workflow(
                self.wf_name, pod_name, container_name="main")
            log_file.write("\n".join(logs_lines))
        logger.debug(
            ("ARGO, log dump for workflow {}, pod {} at: {}\n").format(
                self.wf_name, pod_name, log_file_path))
Example #37
0
def check_volume_task(driverCls,
                      provider,
                      identity,
                      instance_id,
                      volume_id,
                      device_type='ext4',
                      *args,
                      **kwargs):
    try:
        celery_logger.debug("check_volume task started at %s." %
                            datetime.now())
        driver = get_driver(driverCls, provider, identity)
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)
        username = identity.get_username()
        attach_data = volume.extra['attachments'][0]
        device_location = attach_data['device']
        celery_logger.info("device_location: %s" % device_location)

        # One playbook to make two checks:
        # 1. Voume exists
        # 2. Volume has a filesystem
        #    (If not, create one of type 'device_type')
        playbooks = deploy_check_volume(instance.ip,
                                        username,
                                        instance.id,
                                        device_location,
                                        device_type=device_type)
        celery_logger.info(playbooks.__dict__)
        hostname = build_host_name(instance.id, instance.ip)
        result = False if execution_has_failures(playbooks, hostname)\
            or execution_has_unreachable(playbooks, hostname) else True
        if not result:
            raise Exception(
                "Error encountered while checking volume for filesystem: %s" %
                playbooks.stats.summarize(host=hostname))
        return result
    except LibcloudDeploymentError as exc:
        celery_logger.exception(exc)
    except Exception as exc:
        celery_logger.warn(exc)
        check_volume_task.retry(exc=exc)
Example #38
0
def update_volume_metadata(
    driverCls, provider, identity, volume_alias, metadata
):
    """
    """
    from service import volume as volume_service
    try:
        celery_logger.debug(
            "update_volume_metadata task started at %s." % timezone.now()
        )
        driver = get_driver(driverCls, provider, identity)
        volume = driver.get_volume(volume_alias)
        if not volume:
            return
        return volume_service._update_volume_metadata(
            driver, volume, metadata=metadata
        )
    except Exception as exc:
        celery_logger.exception(exc)
        update_volume_metadata.retry(exc=exc)
Example #39
0
    def create(context, wf_def, wf_data={}, lint=False):
        """
        Create a running workflow

        Args:
            context (ArgoContext): context to execute the workflow in
            wf_def (dict): workflow definition
            wf_data (dict, optional): workflow data to be pass along. Defaults to {}.
            lint (bool, optional): Whether to submit workflow definition for
                linting first. Defaults to False.

        Returns:
            ArgoWorkflow: ArgoWorkflow object created based on the returned json
        """
        if wf_data:
            wf_def = _populate_wf_data(wf_def, wf_data)

        json_resp = context.client().run_workflow(wf_def)
        wf_name = json_resp["metadata"]["name"]
        logger.debug("ARGO, workflow {} created".format(wf_name))
        return ArgoWorkflow(wf_name)
def unmount_volume_task(
    driverCls, provider, identity, instance_id, volume_id, *args, **kwargs
):
    try:
        celery_logger.debug("unmount task started at %s." % timezone.now())
        driver = get_driver(driverCls, provider, identity)
        username = identity.get_username()
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)
        device_location = None

        try:
            attach_data = volume.extra['attachments'][0]
            device_location = attach_data['device']
        except (KeyError, IndexError):
            celery_logger.warn(
                "Volume %s missing attachments in Extra" % (volume, )
            )
        if not device_location:
            raise Exception(
                "No device_location found or inferred by volume %s" % volume
            )
        try:
            playbook_results = deploy_unmount_volume(
                instance.ip, username, instance.id, device_location
            )
        except DeviceBusyException:
            # Future-Fixme: Update VolumeStatusHistory.extra, set status to 'unmount_failed'
            raise
        if execution_has_failures(
            playbook_results
        ) or execution_has_unreachable(playbook_results):
            raise Exception(
                "Error encountered while unmounting volume: instance_id: {}, volume_id: {}"
                .format(instance_id, volume_id)
            )
        return device_location
    except Exception as exc:
        celery_logger.warn(exc)
        unmount_volume_task.retry(exc=exc)
Example #41
0
def allocation_threshold_check():
    logger.debug("allocation_threshold_check task started at %s." % datetime.now())
    if not settings.CHECK_THRESHOLD:
        logger.debug("CHECK_THRESHOLD is FALSE -- allocation_threshold_check task finished at %s." % datetime.now())
        return

    for allocation_source in AllocationSource.objects.filter(compute_allowed__gte=0).all():
        snapshot = allocation_source.snapshot
        percentage_used = (snapshot.compute_used / snapshot.compute_allowed) * 100
        # check if percentage more than threshold
        THRESHOLD = [50.0, 90.0]
        for threshold in THRESHOLD:
            if percentage_used > threshold:
                compute_used = snapshot.compute_used
                allocation_source_name = allocation_source.name

                # check if event has been fired
                prev_event = EventTable.objects.filter(name='allocation_source_threshold_met',
                                                       payload__allocation_source_name=allocation_source_name,
                                                       payload__threshold=threshold).last()
                if prev_event:
                    continue

                payload = {}
                payload['allocation_source_name'] = allocation_source_name
                payload['threshold'] = threshold
                payload['usage_percentage'] = float(percentage_used)

                EventTable.objects.create(
                    name='allocation_source_threshold_met',
                    payload=payload,
                    entity_id=payload['allocation_source_name'])
                break
    logger.debug("allocation_threshold_check task finished at %s." % datetime.now())
Example #42
0
def update_snapshot_cyverse(start_date=None, end_date=None):
    logger.debug("update_snapshot_cyverse task started at %s." % datetime.now())
    end_date = timezone.now().replace(microsecond=0) if not end_date else end_date

    for allocation_source in AllocationSource.objects.order_by('name'):
        # calculate and save snapshots here
        allocation_source_name = allocation_source.name
        last_renewal_event = EventTable.objects.filter(
            name='allocation_source_created_or_renewed',
            payload__allocation_source_name__exact=str(allocation_source_name)).order_by('timestamp')

        if not last_renewal_event:
            logger.info('Allocation Source %s Create/Renewal event missing', allocation_source_name)
            continue

        start_date = last_renewal_event.last().timestamp.replace(microsecond=0) if not start_date else start_date

        total_compute_used = 0
        total_burn_rate = 0
        for user in allocation_source.all_users:
            compute_used, burn_rate = total_usage(user.username, start_date=start_date,
                                                  end_date=end_date, allocation_source_name=allocation_source_name,
                                                  burn_rate=True)

            UserAllocationSnapshot.objects.update_or_create(allocation_source=allocation_source, user=user,
                                                            defaults={'compute_used': compute_used,
                                                                      'burn_rate': burn_rate})
            total_compute_used += compute_used
            total_burn_rate += burn_rate
        AllocationSourceSnapshot.objects.update_or_create(allocation_source=allocation_source,
                                                          defaults={'compute_used': total_compute_used,
                                                                    'global_burn_rate': total_burn_rate})

        run_all(rule_list=cyverse_rules,
                defined_variables=CyverseTestRenewalVariables(allocation_source, current_time=end_date,
                                                              last_renewal_event_date=start_date),
                defined_actions=CyverseTestRenewalActions(allocation_source, current_time=end_date))
    # At the end of the task, fire-off an allocation threshold check
    logger.debug("update_snapshot_cyverse task finished at %s." % datetime.now())
    allocation_threshold_check.apply_async()
Example #43
0
def mount_failed(task_uuid, driverCls, provider, identity, volume_id, unmount=False, **celery_task_args):
    from service import volume as volume_service

    try:
        celery_logger.debug("mount_failed task started at %s." % datetime.now())
        celery_logger.info("task_uuid=%s" % task_uuid)
        result = app.AsyncResult(task_uuid)
        with allow_join_result():
            exc = result.get(propagate=False)
        err_str = "Mount Error Traceback:%s" % (result.traceback,)
        celery_logger.error(err_str)
        driver = get_driver(driverCls, provider, identity)
        volume = driver.get_volume(volume_id)
        if unmount:
            tmp_status = "umount_error"
        else:
            tmp_status = "mount_error"
        return volume_service.update_volume_metadata(driver, volume, metadata={"tmp_status": tmp_status})
        celery_logger.debug("mount_failed task finished at %s." % datetime.now())
    except Exception as exc:
        celery_logger.warn(exc)
        mount_failed.retry(exc=exc)
Example #44
0
def _dump_deploy_logs(wf, username, instance_uuid):
    """
    Dump workflow logs locally

    Args:
        wf (ArgoWorkflow): workflow to dump logs of
        username (str): username of owner of the instance
        instance_uuid (str): uuid of the instance
    """
    try:
        context = argo_context_from_config(settings.ARGO_CONFIG_FILE_PATH)

        timestamp = time.strftime("%Y-%m-%d_%H%M%S", time.localtime())
        log_dir = _create_deploy_log_dir(username, instance_uuid, timestamp)

        # fetch all info about pods in workflow
        nodes = wf.get_nodes(context)

        for node_name, node in nodes.items():
            playbook_name = None
            # try finding playbook filename from parameters
            if "inputs" in node and "parameters" in node["inputs"]:
                for param in node["inputs"]["parameters"]:
                    if param["name"] == "playbook":
                        playbook_name = os.path.basename(param["value"])
                        break
            if playbook_name:
                log_filename = os.path.join(log_dir, playbook_name + ".log")
            else:
                # uses node name if playbook filename is not found
                log_filename = os.path.join(log_dir, node_name + ".log")
            wf.dump_pod_logs(context, node_name, log_filename)
    except Exception as exc:
        celery_logger.debug(
            "ARGO, failed to dump logs for workflow {}, {}".format(
                wf.wf_name, type(exc)))
        celery_logger.debug(exc)
Example #45
0
def attach_task(driverCls, provider, identity, instance_id, volume_id,
                device_choice=None, *args, **kwargs):
    try:
        celery_logger.debug("attach_task started at %s." % datetime.now())
        driver = get_driver(driverCls, provider, identity)
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)
        # Step 1. Attach the volume
        # NOTE: device_choice !== device 100%
        driver.attach_volume(instance,
                             volume,
                             device_choice)

        # When the reslt returns the volume will be 'attaching'
        # We can't do anything until the volume is 'available/in-use'
        attempts = 0
        while True:
            volume = driver.get_volume(volume_id)
            # Give up if you can't find the volume
            if not volume:
                return None
            if attempts > 6:  # After 6 attempts (~1min)
                break
            # Openstack Check
            if isinstance(driver, OSDriver) and\
                    'attaching' not in volume.extra.get('status', ''):
                break
            if isinstance(driver, EucaDriver) and\
                    'attaching' not in volume.extra.get('status', ''):
                break
            # Exponential backoff..
            attempts += 1
            sleep_time = 2**attempts
            celery_logger.debug("Volume %s is not ready (%s). Sleep for %s"
                         % (volume.id, volume.extra.get('status', 'no-status'),
                            sleep_time))
            time.sleep(sleep_time)

        if 'available' in volume.extra.get('status', ''):
            raise Exception("Volume %s failed to attach to instance %s"
                            % (volume.id, instance.id))

        # Device path for euca == openstack
        try:
            attach_data = volume.extra['attachments'][0]
            device = attach_data['device']
        except (IndexError, KeyError) as bad_fetch:
            celery_logger.warn("Could not find 'device' in "
                        "volume.extra['attachments']: "
                        "Volume:%s Extra:%s" % (volume.id, volume.extra))
            device = None

        celery_logger.debug("attach_task finished at %s." % datetime.now())
        return device
    except Exception as exc:
        celery_logger.warn(exc)
        attach_task.retry(exc=exc)
Example #46
0
def detach_task(
    driverCls, provider, identity, instance_id, volume_id, *args, **kwargs
):
    try:
        celery_logger.debug("detach_task started at %s." % timezone.now())
        driver = get_driver(driverCls, provider, identity)
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)

        driver.detach_volume(volume)
        # When the reslt returns the volume will be 'detaching'
        # We will ensure the volume does not return to 'in-use'
        attempts = 0
        while True:
            volume = driver.get_volume(volume_id)
            if attempts > 6:    # After 6 attempts (~1min)
                break
            # The Openstack way
            if isinstance(driver, OSDriver)\
                    and 'detaching' not in volume.extra['status']:
                break
            # The Eucalyptus way
            attach_data = volume.extra['attachments'][0]
            if isinstance(driver, EucaDriver) and attach_data\
                    and 'detaching' not in attach_data.get('status'):
                break
            # Exponential backoff..
            attempts += 1
            sleep_time = 2**attempts
            celery_logger.debug(
                "Volume %s is not ready (%s). Sleep for %s" %
                (volume.id, volume.extra['status'], sleep_time)
            )
            time.sleep(sleep_time)

        if 'in-use' in volume.extra['status']:
            raise Exception(
                "Failed to detach Volume %s to instance %s" %
                (volume, instance)
            )

        celery_logger.debug("detach_task finished at %s." % timezone.now())
    except DeviceBusyException:
        # We should NOT retry if the device is busy
        raise
    except Exception as exc:
        # If the volume is NOT attached, do not retry.
        if 'Volume is not attached' in exc.message:
            return
        celery_logger.exception(exc)
        detach_task.retry(exc=exc)
def attach_task(
    driverCls,
    provider,
    identity,
    instance_id,
    volume_id,
    device_choice=None,
    *args,
    **kwargs
):
    celery_logger.debug("attach_task started at %s." % timezone.now())
    driver = get_driver(driverCls, provider, identity)
    from service.volume import attach_volume
    attach_volume(driver, instance_id, volume_id, device_choice=device_choice)

    attempts = 0
    while True:
        volume = driver.get_volume(volume_id)
        assert volume, "Volume ({}) does not exist".format(volume_id)

        volume_status = volume.extra.get('status', '')
        if volume_status == "in-use":
            break

        if attempts > 4:
            raise Exception(
                "Attach task timed out for volume {} and instance {}, volume status: {}"
                .format(volume_id, instance_id, volume_status)
            )

        celery_logger.debug(
            "Volume {} is not ready. Expected 'in-use', got '{}'".format(
                volume_id, volume_status
            )
        )
        time.sleep(10)
        attempts += 1

    try:
        attach_data = volume.extra['attachments'][0]
        device = attach_data['device']
    except (IndexError, KeyError):
        raise Exception(
            "Could not find 'device' in volume.extra {}".format(volume.extra)
        )

    celery_logger.debug("attach_task finished at %s." % timezone.now())
    return device
def remove_empty_networks():
    celery_logger.debug("remove_empty_networks task started at %s." %
                 datetime.now())
    for provider in Provider.get_active(type_name='openstack'):
        remove_empty_networks_for.apply_async(args=[provider.id])
Example #49
0
def mount_task(driverCls, provider, identity, instance_id, volume_id,
               device=None, mount_location=None, *args, **kwargs):
    try:
        celery_logger.debug("mount task started at %s." % datetime.now())
        celery_logger.debug("mount_location: %s" % (mount_location, ))
        driver = get_driver(driverCls, provider, identity)
        instance = driver.get_instance(instance_id)
        volume = driver.get_volume(volume_id)

        username = identity.get_username()
        # DEV NOTE: Set as 'users' because this is a GUARANTEED group
        # and we know our 'user' will exist (if atmo_init_full was executed)
        # in case the VM does NOT rely on iPlant LDAP
        groupname = "users"

        celery_logger.debug(volume)
        try:
            attach_data = volume.extra['attachments'][0]
            if not device:
                device = attach_data['device']
        except KeyError as IndexError:
            celery_logger.warn("Volume %s missing attachments in Extra"
                        % (volume,))
            device = None
        if not device:
            celery_logger.warn("Device never attached. Nothing to mount")
            return None

        private_key = "/opt/dev/atmosphere/extras/ssh/id_rsa"
        kwargs.update({'ssh_key': private_key})
        kwargs.update({'timeout': 120})

        # Step 2. Check the volume is not already mounted
        cm_script = check_mount()
        kwargs.update({'deploy': cm_script})
        driver.deploy_to(instance, **kwargs)

        if device in cm_script.stdout:
            mount_location = _parse_mount_location(cm_script.stdout, device)
            if not mount_location:
                raise Exception("Device already mounted, "
                                "but mount location could not be determined!"
                                "Check _parse_mount_location()!")
            celery_logger.warn(
                "Device already mounted. Mount output:%s" %
                cm_script.stdout)
            # Device has already been mounted. Move along..
            return mount_location

        # Step 3. Find a suitable location to mount the volume
        celery_logger.info("Original mount location - %s" % mount_location)
        if not mount_location:
            inc = 1
            while True:
                if '/vol%s' % inc in cm_script.stdout:
                    inc += 1
                else:
                    break
            mount_location = '/vol%s' % inc

        celery_logger.info("Device location - %s" % device)
        celery_logger.info("New mount location - %s" % mount_location)

        mv_script = mount_volume(device, mount_location, username, groupname)
        kwargs.update({'deploy': mv_script})
        driver.deploy_to(instance, **kwargs)
        celery_logger.debug("mount task finished at %s." % datetime.now())
        return mount_location
    except Exception as exc:
        celery_logger.warn(exc)
        mount_task.retry(exc=exc)
def monitor_allocation_sources(usernames=()):
    """
    Monitor allocation sources, if a snapshot shows that all compute has been used, then enforce as necessary
    """
    celery_logger.debug('monitor_allocation_sources - usernames: %s', usernames)
    allocation_sources = AllocationSource.objects.all()
    for allocation_source in allocation_sources.order_by('name'):
        celery_logger.debug(
            'monitor_allocation_sources - allocation_source: %s',
            allocation_source
        )
        for user in allocation_source.all_users.order_by('username'):
            celery_logger.debug('monitor_allocation_sources - user: %s', user)
            if usernames and user.username not in usernames:
                celery_logger.info(
                    "Skipping User %s - not in the list" % user.username
                )
                continue
            over_allocation = allocation_source.is_over_allocation(user)
            celery_logger.debug(
                'monitor_allocation_sources - user: %s, over_allocation: %s',
                user, over_allocation
            )

            enforcement_override_choice = AllocationSourcePluginManager.get_enforcement_override(
                user, allocation_source
            )
            celery_logger.debug(
                'monitor_allocation_sources - enforcement_override_choice: %s',
                enforcement_override_choice
            )

            if over_allocation and enforcement_override_choice == EnforcementOverrideChoice.NEVER_ENFORCE:
                celery_logger.debug(
                    'Allocation source is over allocation, but %s + user %s has an override of %s, '
                    'therefore not enforcing', allocation_source, user,
                    enforcement_override_choice
                )
                continue

            if not over_allocation and enforcement_override_choice == EnforcementOverrideChoice.ALWAYS_ENFORCE:
                celery_logger.debug(
                    'Allocation source is not over allocation, but %s + user %s has an override of %s, '
                    'therefore enforcing', allocation_source, user,
                    enforcement_override_choice
                )
                # Note: The enforcing happens in the next `if` statement.
            if over_allocation or enforcement_override_choice == EnforcementOverrideChoice.ALWAYS_ENFORCE:
                assert enforcement_override_choice in (
                    EnforcementOverrideChoice.NO_OVERRIDE,
                    EnforcementOverrideChoice.ALWAYS_ENFORCE
                )
                celery_logger.debug(
                    'monitor_allocation_sources - Going to enforce on user: %s',
                    user
                )
                allocation_source_overage_enforcement_for_user.apply_async(
                    args=(allocation_source, user)
                )