def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): self.start_task_limit -= 1 if self.start_task_limit == 0: # schedule another run immediately after this task manager schedule_task_manager() from awx.main.tasks.system import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{ 'type': get_type_for_model(type(t)), 'id': t.id } for t in dependent_tasks] task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() # at this point we already have control/execution nodes selected for the following cases else: task.instance_group = rampart_group execution_node_msg = f' and execution node {task.execution_node}' if task.execution_node else '' logger.debug( f'Submitting job {task.log_format} controlled by {task.controller_node} to instance group {rampart_group.name}{execution_node_msg}.' ) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() task.log_lifecycle("waiting") if rampart_group is not None: self.consume_capacity(task, rampart_group.name, instance=instance) if task.controller_node: self.consume_capacity( task, settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME, instance=self.real_instances[task.controller_node], impact=settings.AWX_CONTROL_NODE_TASK_IMPACT, ) def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: # Before task is dispatched, ensure that job_event partitions exist create_partition(task.event_class._meta.db_table, start=task.created) task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{ 'task': handle_work_success.name, 'kwargs': { 'task_actual': task_actual } }], errbacks=[{ 'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': { 'subtasks': [task_actual] + dependencies } }], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)
def test_disable_activity_stream(): with disable_activity_stream(): Organization.objects.create(name='test-organization') assert ActivityStream.objects.filter( organization__isnull=False).count() == 0
def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): from awx.main.tasks import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{ 'type': get_type_for_model(type(t)), 'id': t.id } for t in dependent_tasks] controller_node = None if task.supports_isolation() and rampart_group.controller_id: try: controller_node = rampart_group.choose_online_controller_node() except IndexError: logger.debug( six.text_type( "No controllers available in group {} to run {}"). format(rampart_group.name, task.log_format)) return task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' logger.info('Transitioning %s to running status.', task.log_format) elif not task.supports_isolation() and rampart_group.controller_id: # non-Ansible jobs on isolated instances run on controller task.instance_group = rampart_group.controller task.execution_node = random.choice( list(rampart_group.controller.instances.all().values_list( 'hostname', flat=True))) logger.info( six.text_type( 'Submitting isolated {} to queue {}.').format( task.log_format, task.instance_group.name, task.execution_node)) elif controller_node: task.instance_group = rampart_group task.execution_node = instance.hostname task.controller_node = controller_node logger.info( six.text_type( 'Submitting isolated {} to queue {} controlled by {}.' ).format(task.log_format, task.execution_node, controller_node)) else: task.instance_group = rampart_group if instance is not None: task.execution_node = instance.hostname logger.info( six.text_type( 'Submitting {} to <instance group, instance> <{},{}>.' ).format(task.log_format, task.instance_group_id, task.execution_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() if rampart_group is not None: self.consume_capacity(task, rampart_group.name) def post_commit(): task.websocket_emit_status(task.status) if task.status != 'failed' and type(task) is not WorkflowJob: task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{ 'task': handle_work_success.name, 'kwargs': { 'task_actual': task_actual } }], errbacks=[{ 'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': { 'subtasks': [task_actual] + dependencies } }], ) connection.on_commit(post_commit)
def apply_cluster_membership_policies(): from awx.main.signals import disable_activity_stream started_waiting = time.time() with advisory_lock('cluster_policy_lock', wait=True): lock_time = time.time() - started_waiting if lock_time > 1.0: to_log = logger.info else: to_log = logger.debug to_log('Waited {} seconds to obtain lock name: cluster_policy_lock'. format(lock_time)) started_compute = time.time() # Hop nodes should never get assigned to an InstanceGroup. all_instances = list( Instance.objects.exclude(node_type='hop').order_by('id')) all_groups = list(InstanceGroup.objects.prefetch_related('instances')) total_instances = len(all_instances) actual_groups = [] actual_instances = [] Group = namedtuple('Group', ['obj', 'instances', 'prior_instances']) Node = namedtuple('Instance', ['obj', 'groups']) # Process policy instance list first, these will represent manually managed memberships instance_hostnames_map = { inst.hostname: inst for inst in all_instances } for ig in all_groups: group_actual = Group(obj=ig, instances=[], prior_instances=[ instance.pk for instance in ig.instances.all() ]) # obtained in prefetch for hostname in ig.policy_instance_list: if hostname not in instance_hostnames_map: logger.info("Unknown instance {} in {} policy list".format( hostname, ig.name)) continue inst = instance_hostnames_map[hostname] group_actual.instances.append(inst.id) # NOTE: arguable behavior: policy-list-group is not added to # instance's group count for consideration in minimum-policy rules if group_actual.instances: logger.debug( "Policy List, adding Instances {} to Group {}".format( group_actual.instances, ig.name)) actual_groups.append(group_actual) # Process Instance minimum policies next, since it represents a concrete lower bound to the # number of instances to make available to instance groups actual_instances = [ Node(obj=i, groups=[]) for i in all_instances if i.managed_by_policy ] logger.debug("Total instances: {}, available for policy: {}".format( total_instances, len(actual_instances))) for g in sorted(actual_groups, key=lambda x: len(x.instances)): exclude_type = 'execution' if g.obj.name == settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME else 'control' policy_min_added = [] for i in sorted(actual_instances, key=lambda x: len(x.groups)): if i.obj.node_type == exclude_type: continue # never place execution instances in controlplane group or control instances in other groups if len(g.instances) >= g.obj.policy_instance_minimum: break if i.obj.id in g.instances: # If the instance is already _in_ the group, it was # applied earlier via the policy list continue g.instances.append(i.obj.id) i.groups.append(g.obj.id) policy_min_added.append(i.obj.id) if policy_min_added: logger.debug( "Policy minimum, adding Instances {} to Group {}".format( policy_min_added, g.obj.name)) # Finally, process instance policy percentages for g in sorted(actual_groups, key=lambda x: len(x.instances)): exclude_type = 'execution' if g.obj.name == settings.DEFAULT_CONTROL_PLANE_QUEUE_NAME else 'control' candidate_pool_ct = sum(1 for i in actual_instances if i.obj.node_type != exclude_type) if not candidate_pool_ct: continue policy_per_added = [] for i in sorted(actual_instances, key=lambda x: len(x.groups)): if i.obj.node_type == exclude_type: continue if i.obj.id in g.instances: # If the instance is already _in_ the group, it was # applied earlier via a minimum policy or policy list continue if 100 * float( len(g.instances) ) / candidate_pool_ct >= g.obj.policy_instance_percentage: break g.instances.append(i.obj.id) i.groups.append(g.obj.id) policy_per_added.append(i.obj.id) if policy_per_added: logger.debug( "Policy percentage, adding Instances {} to Group {}". format(policy_per_added, g.obj.name)) # Determine if any changes need to be made needs_change = False for g in actual_groups: if set(g.instances) != set(g.prior_instances): needs_change = True break if not needs_change: logger.debug('Cluster policy no-op finished in {} seconds'.format( time.time() - started_compute)) return # On a differential basis, apply instances to groups with transaction.atomic(): with disable_activity_stream(): for g in actual_groups: if g.obj.is_container_group: logger.debug( 'Skipping containerized group {} for policy calculation' .format(g.obj.name)) continue instances_to_add = set(g.instances) - set( g.prior_instances) instances_to_remove = set(g.prior_instances) - set( g.instances) if instances_to_add: logger.debug('Adding instances {} to group {}'.format( list(instances_to_add), g.obj.name)) g.obj.instances.add(*instances_to_add) if instances_to_remove: logger.debug( 'Removing instances {} from group {}'.format( list(instances_to_remove), g.obj.name)) g.obj.instances.remove(*instances_to_remove) logger.debug( 'Cluster policy computation finished in {} seconds'.format( time.time() - started_compute))
def make_the_data(): with disable_activity_stream(): with batch_role_ancestor_rebuilding(), disable_computed_fields(): admin, created = User.objects.get_or_create(username='******', is_superuser=True) if created: admin.is_superuser = True admin.save() admin.set_password('test') admin.save() org_admin, created = User.objects.get_or_create( username='******') if created: org_admin.set_password('test') org_admin.save() org_member, created = User.objects.get_or_create( username='******') if created: org_member.set_password('test') org_member.save() prj_admin, created = User.objects.get_or_create( username='******') if created: prj_admin.set_password('test') prj_admin.save() jt_admin, created = User.objects.get_or_create(username='******') if created: jt_admin.set_password('test') jt_admin.save() inv_admin, created = User.objects.get_or_create( username='******') if created: inv_admin.set_password('test') inv_admin.save() print('# Creating %d organizations' % n_organizations) for i in xrange(n_organizations): sys.stdout.write('\r%d ' % (i + 1)) sys.stdout.flush() org, _ = Organization.objects.get_or_create( name='%s Organization %d' % (prefix, i)) organizations.append(org) if i == 0: org.admin_role.members.add(org_admin) org.member_role.members.add(org_admin) org.member_role.members.add(org_member) org.member_role.members.add(prj_admin) org.member_role.members.add(jt_admin) org.member_role.members.add(inv_admin) print('') print('# Creating %d users' % n_users) org_idx = 0 for n in spread(n_users, n_organizations): for i in range(n): ids['user'] += 1 user_id = ids['user'] sys.stdout.write('\r Assigning %d to %s: %d ' % (n, organizations[org_idx].name, i + 1)) sys.stdout.flush() user, _ = User.objects.get_or_create(username='******' % (prefix, user_id)) organizations[org_idx].member_role.members.add(user) users.append(user) org_idx += 1 print('') creator_gen = yield_choice(users) for i in range(6): next(creator_gen) modifier_gen = yield_choice(users) print('# Creating %d teams' % n_teams) org_idx = 0 for n in spread(n_teams, n_organizations): org = organizations[org_idx] for i in range(n): ids['team'] += 1 team_id = ids['team'] sys.stdout.write('\r Assigning %d to %s: %d ' % (n, org.name, i + 1)) sys.stdout.flush() team, _ = Team.objects.get_or_create( name='%s Team %d Org %d' % (prefix, team_id, org_idx), organization=org, defaults=dict(created_by=next(creator_gen), modified_by=next(modifier_gen))) teams.append(team) org_idx += 1 print('') print('# Adding users to teams') for org in organizations: org_teams = [t for t in org.teams.all()] org_users = [u for u in org.member_role.members.all()] print(' Spreading %d users accross %d teams for %s' % (len(org_users), len(org_teams), org.name)) # Our normal spread for most users cur_user_idx = 0 cur_team_idx = 0 for n in spread(len(org_users), len(org_teams)): team = org_teams[cur_team_idx] for i in range(n): if cur_user_idx < len(org_users): user = org_users[cur_user_idx] team.member_role.members.add(user) cur_user_idx += 1 cur_team_idx += 1 # First user gets added to all teams for team in org_teams: team.member_role.members.add(org_users[0]) print('# Creating %d credentials for users' % (n_credentials - n_credentials // 2)) user_idx = 0 for n in spread(n_credentials - n_credentials // 2, n_users): user = users[user_idx] for i in range(n): ids['credential'] += 1 sys.stdout.write('\r %d ' % (ids['credential'])) sys.stdout.flush() credential_id = ids['credential'] credential, _ = Credential.objects.get_or_create( name='%s Credential %d User %d' % (prefix, credential_id, user_idx), defaults=dict(created_by=next(creator_gen), modified_by=next(modifier_gen)), credential_type=CredentialType.from_v1_kind('ssh')) credential.admin_role.members.add(user) credentials.append(credential) user_idx += 1 print('') credential_gen = yield_choice(credentials) print('# Creating %d credentials for teams' % (n_credentials // 2)) team_idx = 0 starting_credential_id = ids['credential'] for n in spread(n_credentials - n_credentials // 2, n_teams): team = teams[team_idx] for i in range(n): ids['credential'] += 1 sys.stdout.write( '\r %d ' % (ids['credential'] - starting_credential_id)) sys.stdout.flush() credential_id = ids['credential'] credential, _ = Credential.objects.get_or_create( name='%s Credential %d team %d' % (prefix, credential_id, team_idx), defaults=dict(created_by=next(creator_gen), modified_by=next(modifier_gen)), credential_type=CredentialType.from_v1_kind('ssh')) credential.admin_role.parents.add(team.member_role) credentials.append(credential) team_idx += 1 print('') print('# Creating %d projects' % n_projects) org_idx = 0 for n in spread(n_projects, n_organizations): org = organizations[org_idx] for i in range(n): ids['project'] += 1 project_id = ids['project'] sys.stdout.write('\r Assigning %d to %s: %d ' % (n, org.name, i + 1)) sys.stdout.flush() project, _ = Project.objects.get_or_create( name='%s Project %d Org %d' % (prefix, project_id, org_idx), organization=org, defaults=dict( created_by=next(creator_gen), modified_by=next(modifier_gen), scm_url= 'https://github.com/jlaska/ansible-playbooks.git', scm_type='git', playbook_files=[ "check.yml", "debug-50.yml", "debug.yml", "debug2.yml", "debug_extra_vars.yml", "dynamic_inventory.yml", "environ_test.yml", "fail_unless.yml", "pass_unless.yml", "pause.yml", "ping-20.yml", "ping.yml", "setfact_50.yml", "vault.yml" ])) projects.append(project) if org_idx == 0 and i == 0: project.admin_role.members.add(prj_admin) org_idx += 1 print('') print('# Creating %d inventories' % n_inventories) org_idx = 0 for n in spread(n_inventories, min(n_inventories // 4 + 1, n_organizations)): org = organizations[org_idx] for i in range(n): ids['inventory'] += 1 inventory_id = ids['inventory'] sys.stdout.write('\r Assigning %d to %s: %d ' % (n, org.name, i + 1)) sys.stdout.flush() inventory, _ = Inventory.objects.get_or_create( name='%s Inventory %d Org %d' % (prefix, inventory_id, org_idx), organization=org, defaults=dict(created_by=next(creator_gen), modified_by=next(modifier_gen)), variables='{"ansible_connection": "local"}') inventories.append(inventory) if org_idx == 0 and i == 0: inventory.admin_role.members.add(inv_admin) org_idx += 1 print('') print('# Creating %d inventory_groups' % n_inventory_groups) inv_idx = 0 for n in spread(n_inventory_groups, n_inventories): inventory = inventories[inv_idx] parent_list = [None] * 3 for i in range(n): ids['group'] += 1 group_id = ids['group'] sys.stdout.write('\r Assigning %d to %s: %d ' % (n, inventory.name, i + 1)) sys.stdout.flush() group, _ = Group.objects.get_or_create( name='%s Group %d Inventory %d' % (prefix, group_id, inv_idx), inventory=inventory, defaults=dict(created_by=next(creator_gen), modified_by=next(modifier_gen))) # Have each group have up to 3 parent groups for parent_n in range(3): if i // 4 + parent_n < len( parent_list) and parent_list[i // 4 + parent_n]: group.parents.add(parent_list[i // 4 + parent_n]) if parent_list[i // 4] is None: parent_list[i // 4] = group else: parent_list.append(group) inventory_groups.append(group) inv_idx += 1 print('') print('# Creating %d inventory_hosts' % n_inventory_hosts) group_idx = 0 for n in spread(n_inventory_hosts, n_inventory_groups): group = inventory_groups[group_idx] for i in range(n): ids['host'] += 1 host_id = ids['host'] sys.stdout.write('\r Assigning %d to %s: %d ' % (n, group.name, i + 1)) sys.stdout.flush() host, _ = Host.objects.get_or_create( name='%s.host-%06d.group-%05d.dummy' % (prefix, host_id, group_idx), inventory=group.inventory, defaults=dict(created_by=next(creator_gen), modified_by=next(modifier_gen))) # Add the host to up to 3 groups host.groups.add(group) for m in range(2): if group_idx + m < len( inventory_groups ) and group.inventory.id == inventory_groups[ group_idx + m].inventory.id: host.groups.add(inventory_groups[group_idx + m]) inventory_hosts.append(host) group_idx += 1 print('') print('# Creating %d job_templates' % n_job_templates) project_idx = 0 inv_idx = 0 for n in spread(n_job_templates, n_projects): project = projects[project_idx] for i in range(n): ids['job_template'] += 1 job_template_id = ids['job_template'] sys.stdout.write('\r Assigning %d to %s: %d ' % (n, project.name, i + 1)) sys.stdout.flush() inventory = None org_inv_count = project.organization.inventories.count() if org_inv_count > 0: inventory = project.organization.inventories.all()[ inv_idx % org_inv_count] extra_kwargs = {} job_template, _ = JobTemplate.objects.get_or_create( name='%s Job Template %d Project %d' % (prefix, job_template_id, project_idx), defaults=dict(inventory=inventory, project=project, created_by=next(creator_gen), modified_by=next(modifier_gen), playbook="debug.yml", **extra_kwargs)) job_template.credentials.add(next(credential_gen)) if ids['job_template'] % 7 == 0: job_template.credentials.add(next(credential_gen)) if ids['job_template'] % 5 == 0: # formerly cloud credential job_template.credentials.add(next(credential_gen)) job_template._is_new = _ job_templates.append(job_template) inv_idx += 1 if project_idx == 0 and i == 0: job_template.admin_role.members.add(jt_admin) project_idx += 1 if n > 0: print('') print('# Creating %d Workflow Job Templates' % n_wfjts) org_idx = 0 for n in spread(n_wfjts, n_organizations): org = organizations[org_idx] for i in range(n): ids['wfjts'] += 1 wfjt_id = ids['wfjts'] sys.stdout.write('\r Assigning %d to %s: %d ' % (n, org.name, i + 1)) sys.stdout.flush() wfjt, _ = WorkflowJobTemplate.objects.get_or_create( name='%s WFJT %d Org %d' % (prefix, wfjt_id, org_idx), description=bulk_data_description, organization=org, defaults=dict(created_by=next(creator_gen), modified_by=next(modifier_gen))) wfjt._is_new = _ wfjts.append(wfjt) org_idx += 1 if n: print('') print('# Creating %d Workflow Job Template nodes' % n_nodes) wfjt_idx = 0 for n in spread(n_nodes, n_wfjts): wfjt = wfjts[wfjt_idx] if not wfjt._is_new: continue jt_gen = yield_choice(job_templates) inv_gen = yield_choice(inventories) cred_gen = yield_choice(credentials) parent_idx = 0 wfjt_nodes = [] for i in range(n): ids['nodes'] += 1 sys.stdout.write('\r Assigning %d to %s: %d ' % (n, wfjt.name, i + 1)) sys.stdout.flush() kwargs = dict(workflow_job_template=wfjt, unified_job_template=next(jt_gen), modified=now()) if i % 2 == 0: # only apply inventories for every other node kwargs['inventory'] = next(inv_gen) if i % 3 == 0: # only apply prompted credential every 3rd node kwargs['credential'] = next(cred_gen) node, _ = WorkflowJobTemplateNode.objects.get_or_create( **kwargs) # nodes.append(node) wfjt_nodes.append(node) if i <= 3: continue parent_node = wfjt_nodes[parent_idx] if parent_node.workflow_job_template != node.workflow_job_template: raise Exception( "Programming error, associating nodes in different workflows" ) elif parent_node == node: raise Exception("error, self association") if parent_idx % 2 == 0: parent_node.always_nodes.add(node) else: if (i + 1) % 3 == 0: parent_node.failure_nodes.add(node) else: parent_node.success_nodes.add(node) parent_idx = (parent_idx + 7) % len(wfjt_nodes) wfjt_idx += 1 if n: print('') print('# Creating %d Labels' % n_labels) org_idx = 0 for n in spread(n_labels, n_organizations): org = organizations[org_idx] for i in range(n): ids['labels'] += 1 label_id = ids['labels'] sys.stdout.write('\r Assigning %d to %s: %d ' % (n, org.name, i + 1)) sys.stdout.flush() label, _ = Label.objects.get_or_create( name='%sL_%do%d' % (prefix, label_id, org_idx), organization=org, defaults=dict(created_by=next(creator_gen), modified_by=next(modifier_gen))) labels.append(label) org_idx += 1 if n: print('') label_gen = yield_choice(labels) print('# Adding labels to job templates') jt_idx = 0 for n in spread(n_labels * 7, n_job_templates): if n == 0: continue jt = job_templates[jt_idx] if not jt._is_new: continue print(' Giving %d labels to %s JT' % (n, jt.name)) for i in range(n): jt.labels.add(next(label_gen)) jt_idx += 1 print('# Adding labels to workflow job templates') wfjt_idx = 0 for n in spread(n_labels * 3, n_wfjts): wfjt = wfjts[wfjt_idx] if not jt._is_new: continue print(' Giving %d labels to %s WFJT' % (n, wfjt.name)) for i in range(n): wfjt.labels.add(next(label_gen)) wfjt_idx += 1 # Disable logging here, because it will mess up output format logger = logging.getLogger('awx.main') logger.propagate = False print('# Creating %d jobs' % n_jobs) group_idx = 0 job_template_idx = 0 job_i = 0 for n in spread(n_jobs, n_job_templates): job_template = job_templates[job_template_idx] for i in range(n): sys.stdout.write('\r Assigning %d to %s: %d ' % (n, job_template.name, i + 1)) sys.stdout.flush() if len(jobs) % 4 == 0: job_stat = 'failed' elif len(jobs) % 11 == 0: job_stat = 'canceled' else: job_stat = 'successful' job, _ = Job.objects.get_or_create( job_template=job_template, status=job_stat, name="%s-%d" % (job_template.name, job_i), project=job_template.project, inventory=job_template.inventory, ) for ec in job_template.credentials.all(): job.credentials.add(ec) job._is_new = _ jobs.append(job) job_i += 1 if not job._is_new: group_idx += 1 continue if i + 1 == n: job_template.last_job = job if job_template.pk % 5 == 0: job_template.current_job = job job_template.save() if job._is_new: with transaction.atomic(): if job_template.inventory: inv_groups = [ g for g in job_template.inventory.groups.all() ] if len(inv_groups): JobHostSummary.objects.bulk_create([ JobHostSummary(job=job, host=h, host_name=h.name, processed=1, created=now(), modified=now()) for h in inv_groups[ group_idx % len(inv_groups)].hosts.all()[:100] ]) group_idx += 1 job_template_idx += 1 if n: print('') print('# Creating %d job events' % n_job_events) job_idx = 0 for n in spread(n_job_events, n_jobs): job = jobs[job_idx] # Check if job already has events, for idempotence if not job._is_new: continue # Bulk create in chunks with maximum chunk size MAX_BULK_CREATE = 100 for j in range((n / MAX_BULK_CREATE) + 1): n_subgroup = MAX_BULK_CREATE if j == n / MAX_BULK_CREATE: # on final pass, create the remainder n_subgroup = n % MAX_BULK_CREATE sys.stdout.write( '\r Creating %d job events for job %d, subgroup: %d' % (n, job.id, j + 1)) sys.stdout.flush() JobEvent.objects.bulk_create([ JobEvent(created=now(), modified=now(), job=job, event='runner_on_ok') for i in range(n_subgroup) ]) job_idx += 1 if n: print('')
def copy_model_obj(old_parent, new_parent, model, obj, creater, copy_name='', create_kwargs=None): fields_to_preserve = set(getattr(model, 'FIELDS_TO_PRESERVE_AT_COPY', [])) fields_to_discard = set(getattr(model, 'FIELDS_TO_DISCARD_AT_COPY', [])) m2m_to_preserve = {} o2m_to_preserve = {} create_kwargs = create_kwargs or {} for field_name in fields_to_discard: create_kwargs.pop(field_name, None) for field in model._meta.get_fields(): try: field_val = getattr(obj, field.name) except AttributeError: continue # Adjust copy blocked fields here. if ( field.name in fields_to_discard or field.name in ['id', 'pk', 'polymorphic_ctype', 'unifiedjobtemplate_ptr', 'created_by', 'modified_by'] or field.name.endswith('_role') ): create_kwargs.pop(field.name, None) continue if field.one_to_many: if field.name in fields_to_preserve: o2m_to_preserve[field.name] = field_val elif field.many_to_many: if field.name in fields_to_preserve and not old_parent: m2m_to_preserve[field.name] = field_val elif field.many_to_one and not field_val: create_kwargs.pop(field.name, None) elif field.many_to_one and field_val == old_parent: create_kwargs[field.name] = new_parent elif field.name == 'name' and not old_parent: create_kwargs[field.name] = copy_name or field_val + ' copy' elif field.name in fields_to_preserve: create_kwargs[field.name] = CopyAPIView._decrypt_model_field_if_needed(obj, field.name, field_val) # WorkflowJobTemplateNodes that represent an approval are *special*; # when we copy them, we actually want to *copy* the UJT they point at # rather than share the template reference between nodes in disparate # workflows if isinstance(obj, WorkflowJobTemplateNode) and isinstance(getattr(obj, 'unified_job_template'), WorkflowApprovalTemplate): new_approval_template, sub_objs = CopyAPIView.copy_model_obj(None, None, WorkflowApprovalTemplate, obj.unified_job_template, creater) create_kwargs['unified_job_template'] = new_approval_template new_obj = model.objects.create(**create_kwargs) logger.debug('Deep copy: Created new object {}({})'.format(new_obj, model)) # Need to save separatedly because Djang-crum get_current_user would # not work properly in non-request-response-cycle context. new_obj.created_by = creater new_obj.save() from awx.main.signals import disable_activity_stream with disable_activity_stream(): for m2m in m2m_to_preserve: for related_obj in m2m_to_preserve[m2m].all(): getattr(new_obj, m2m).add(related_obj) if not old_parent: sub_objects = [] for o2m in o2m_to_preserve: for sub_obj in o2m_to_preserve[o2m].all(): sub_model = type(sub_obj) sub_objects.append((sub_model.__module__, sub_model.__name__, sub_obj.pk)) return new_obj, sub_objects ret = {obj: new_obj} for o2m in o2m_to_preserve: for sub_obj in o2m_to_preserve[o2m].all(): ret.update(CopyAPIView.copy_model_obj(obj, new_obj, type(sub_obj), sub_obj, creater)) return ret
def delete_jobs(collector): with disable_activity_stream(), disable_computed_fields(): qs = Job.objects.all() collector.collect(qs) collector.delete()
def gather(dest=None, module=None, subset=None, since=None, until=None, collection_type='scheduled'): """ Gather all defined metrics and write them as JSON files in a .tgz :param dest: the (optional) absolute path to write a compressed tarball :param module: the module to search for registered analytic collector functions; defaults to awx.main.analytics.collectors """ log_level = logging.ERROR if collection_type != 'scheduled' else logging.DEBUG if not _valid_license(): logger.log(log_level, "Invalid License provided, or No License Provided") return None if collection_type != 'dry-run': if not settings.INSIGHTS_TRACKING_STATE: logger.log(log_level, "Insights for Ansible Automation Platform not enabled. Use --dry-run to gather locally without sending.") return None if not (settings.AUTOMATION_ANALYTICS_URL and settings.REDHAT_USERNAME and settings.REDHAT_PASSWORD): logger.log(log_level, "Not gathering analytics, configuration is invalid. Use --dry-run to gather locally without sending.") return None with advisory_lock('gather_analytics_lock', wait=False) as acquired: if not acquired: logger.log(log_level, "Not gathering analytics, another task holds lock") return None from awx.conf.models import Setting from awx.main.analytics import collectors from awx.main.signals import disable_activity_stream logger.debug("Last analytics run was: {}".format(settings.AUTOMATION_ANALYTICS_LAST_GATHER)) try: since, until, last_gather = calculate_collection_interval(since, until) except ValueError: return None last_entries = Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_ENTRIES').first() last_entries = json.loads((last_entries.value if last_entries is not None else '') or '{}', object_hook=datetime_hook) collector_module = module if module else collectors collector_list = [ func for name, func in inspect.getmembers(collector_module) if inspect.isfunction(func) and hasattr(func, '__awx_analytics_key__') and (not subset or name in subset) ] if not any(c.__awx_analytics_key__ == 'config' for c in collector_list): # In order to ship to analytics, we must include the output of the built-in 'config' collector. collector_list.append(collectors.config) json_collectors = [func for func in collector_list if func.__awx_analytics_type__ == 'json'] csv_collectors = [func for func in collector_list if func.__awx_analytics_type__ == 'csv'] dest = pathlib.Path(dest or tempfile.mkdtemp(prefix='awx_analytics')) gather_dir = dest.joinpath('stage') gather_dir.mkdir(mode=0o700) tarfiles = [] succeeded = True # These json collectors are pretty compact, so collect all of them before shipping to analytics. data = {} for func in json_collectors: key = func.__awx_analytics_key__ filename = f'{key}.json' try: last_entry = max(last_entries.get(key) or last_gather, until - timedelta(weeks=4)) results = (func(since or last_entry, collection_type=collection_type, until=until), func.__awx_analytics_version__) json.dumps(results, cls=DjangoJSONEncoder) # throwaway check to see if the data is json-serializable data[filename] = results except Exception: logger.exception("Could not generate metric {}".format(filename)) if data: if data.get('config.json') is None: logger.error("'config' collector data is missing.") return None tgzfile = package(dest.parent, data, until) if tgzfile is not None: tarfiles.append(tgzfile) if collection_type != 'dry-run': if ship(tgzfile): with disable_activity_stream(): for filename in data: key = filename.replace('.json', '') last_entries[key] = max(last_entries[key], until) if last_entries.get(key) else until settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(last_entries, cls=DjangoJSONEncoder) else: succeeded = False for func in csv_collectors: key = func.__awx_analytics_key__ filename = f'{key}.csv' try: # These slicer functions may return a generator. The `since` parameter is # allowed to be None, and will fall back to LAST_ENTRIES[key] or to # LAST_GATHER (truncated appropriately to match the 4-week limit). if func.__awx_expensive__: slices = func.__awx_expensive__(key, since, until, last_gather) else: slices = collectors.trivial_slicing(key, since, until, last_gather) for start, end in slices: files = func(start, full_path=gather_dir, until=end) if not files: if collection_type != 'dry-run': with disable_activity_stream(): entry = last_entries.get(key) last_entries[key] = max(entry, end) if entry and type(entry) == type(end) else end settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(last_entries, cls=DjangoJSONEncoder) continue slice_succeeded = True for fpath in files: payload = {filename: (fpath, func.__awx_analytics_version__)} payload['config.json'] = data.get('config.json') if payload['config.json'] is None: logger.error("'config' collector data is missing, and is required to ship.") return None tgzfile = package(dest.parent, payload, until) if tgzfile is not None: tarfiles.append(tgzfile) if collection_type != 'dry-run': if not ship(tgzfile): slice_succeeded, succeeded = False, False break if slice_succeeded and collection_type != 'dry-run': with disable_activity_stream(): entry = last_entries.get(key) last_entries[key] = max(entry, end) if entry and type(entry) == type(end) else end settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps(last_entries, cls=DjangoJSONEncoder) except Exception: succeeded = False logger.exception("Could not generate metric {}".format(filename)) if collection_type != 'dry-run': if succeeded: for fpath in tarfiles: if os.path.exists(fpath): os.remove(fpath) with disable_activity_stream(): if not settings.AUTOMATION_ANALYTICS_LAST_GATHER or until > settings.AUTOMATION_ANALYTICS_LAST_GATHER: # `AUTOMATION_ANALYTICS_LAST_GATHER` is set whether collection succeeds or fails; # if collection fails because of a persistent, underlying issue and we do not set last_gather, # we risk the collectors hitting an increasingly greater workload while the underlying issue # remains unresolved. Put simply, if collection fails, we just move on. # All that said, `AUTOMATION_ANALYTICS_LAST_GATHER` plays a much smaller role in determining # what is actually collected than it used to; collectors now mostly rely on their respective entry # under `last_entries` to determine what should be collected. settings.AUTOMATION_ANALYTICS_LAST_GATHER = until shutil.rmtree(dest, ignore_errors=True) # clean up individual artifact files if not tarfiles: # No data was collected logger.warning("No data from {} to {}".format(since or last_gather, until)) return None return tarfiles
def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): self.start_task_limit -= 1 if self.start_task_limit == 0: # schedule another run immediately after this task manager schedule_task_manager() from awx.main.tasks import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{ 'type': get_type_for_model(type(t)), 'id': t.id } for t in dependent_tasks] task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() elif rampart_group.is_container_group: task.instance_group = rampart_group if task.capacity_type == 'execution': # find one real, non-containerized instance with capacity to # act as the controller for k8s API interaction try: task.controller_node = Instance.choose_online_control_plane_node( ) task.log_lifecycle("controller_node_chosen") except IndexError: logger.warning( "No control plane nodes available to run containerized job {}" .format(task.log_format)) return else: # project updates and system jobs don't *actually* run in pods, so # just pick *any* non-containerized host and use it as the execution node task.execution_node = Instance.choose_online_control_plane_node( ) task.log_lifecycle("execution_node_chosen") logger.debug( 'Submitting containerized {} to queue {}.'.format( task.log_format, task.execution_node)) else: task.instance_group = rampart_group task.execution_node = instance.hostname task.log_lifecycle("execution_node_chosen") if instance.node_type == 'execution': try: task.controller_node = Instance.choose_online_control_plane_node( ) task.log_lifecycle("controller_node_chosen") except IndexError: logger.warning( "No control plane nodes available to manage {}". format(task.log_format)) return else: # control plane nodes will manage jobs locally for performance and resilience task.controller_node = task.execution_node task.log_lifecycle("controller_node_chosen") logger.debug( 'Submitting job {} to queue {} controlled by {}.'.format( task.log_format, task.execution_node, task.controller_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() task.log_lifecycle("waiting") if rampart_group is not None: self.consume_capacity(task, rampart_group.name, instance=instance) def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: # Before task is dispatched, ensure that job_event partitions exist create_partition(task.event_class._meta.db_table, start=task.created) task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{ 'task': handle_work_success.name, 'kwargs': { 'task_actual': task_actual } }], errbacks=[{ 'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': { 'subtasks': [task_actual] + dependencies } }], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)
def add_dependencies(self, task, dependencies): with disable_activity_stream(): task.dependent_jobs.add(*dependencies)
def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): from awx.main.tasks import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{'type': get_type_for_model(type(t)), 'id': t.id} for t in dependent_tasks] controller_node = None if task.supports_isolation() and rampart_group.controller_id: try: controller_node = rampart_group.choose_online_controller_node() except IndexError: logger.debug("No controllers available in group {} to run {}".format( rampart_group.name, task.log_format)) return task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() elif not task.supports_isolation() and rampart_group.controller_id: # non-Ansible jobs on isolated instances run on controller task.instance_group = rampart_group.controller task.execution_node = random.choice(list(rampart_group.controller.instances.all().values_list('hostname', flat=True))) logger.debug('Submitting isolated {} to queue {}.'.format( task.log_format, task.instance_group.name, task.execution_node)) elif controller_node: task.instance_group = rampart_group task.execution_node = instance.hostname task.controller_node = controller_node logger.debug('Submitting isolated {} to queue {} controlled by {}.'.format( task.log_format, task.execution_node, controller_node)) elif rampart_group.is_containerized: # find one real, non-containerized instance with capacity to # act as the controller for k8s API interaction match = None for group in InstanceGroup.objects.all(): if group.is_containerized or group.controller_id: continue match = group.fit_task_to_most_remaining_capacity_instance(task) if match: break task.instance_group = rampart_group if match is None: logger.warn( 'No available capacity to run containerized <{}>.'.format(task.log_format) ) else: if task.supports_isolation(): task.controller_node = match.hostname else: # project updates and inventory updates don't *actually* run in pods, # so just pick *any* non-isolated, non-containerized host and use it # as the execution node task.execution_node = match.hostname logger.debug('Submitting containerized {} to queue {}.'.format( task.log_format, task.execution_node)) else: task.instance_group = rampart_group if instance is not None: task.execution_node = instance.hostname logger.debug('Submitting {} to <instance group, instance> <{},{}>.'.format( task.log_format, task.instance_group_id, task.execution_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() if rampart_group is not None: self.consume_capacity(task, rampart_group.name) def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{ 'task': handle_work_success.name, 'kwargs': {'task_actual': task_actual} }], errbacks=[{ 'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': {'subtasks': [task_actual] + dependencies} }], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)
def copy_model_obj(old_parent, new_parent, model, obj, creater, copy_name='', create_kwargs=None): fields_to_preserve = set( getattr(model, 'FIELDS_TO_PRESERVE_AT_COPY', [])) fields_to_discard = set(getattr(model, 'FIELDS_TO_DISCARD_AT_COPY', [])) m2m_to_preserve = {} o2m_to_preserve = {} create_kwargs = create_kwargs or {} for field_name in fields_to_discard: create_kwargs.pop(field_name, None) for field in model._meta.get_fields(): try: field_val = getattr(obj, field.name) except AttributeError: continue # Adjust copy blacklist fields here. if field.name in fields_to_discard or field.name in [ 'id', 'pk', 'polymorphic_ctype', 'unifiedjobtemplate_ptr', 'created_by', 'modified_by' ] or field.name.endswith('_role'): create_kwargs.pop(field.name, None) continue if field.one_to_many: if field.name in fields_to_preserve: o2m_to_preserve[field.name] = field_val elif field.many_to_many: if field.name in fields_to_preserve and not old_parent: m2m_to_preserve[field.name] = field_val elif field.many_to_one and not field_val: create_kwargs.pop(field.name, None) elif field.many_to_one and field_val == old_parent: create_kwargs[field.name] = new_parent elif field.name == 'name' and not old_parent: create_kwargs[field.name] = copy_name or field_val + ' copy' elif field.name in fields_to_preserve: create_kwargs[ field.name] = CopyAPIView._decrypt_model_field_if_needed( obj, field.name, field_val) new_obj = model.objects.create(**create_kwargs) logger.debug( six.text_type('Deep copy: Created new object {}({})').format( new_obj, model)) # Need to save separatedly because Djang-crum get_current_user would # not work properly in non-request-response-cycle context. new_obj.created_by = creater new_obj.save() from awx.main.signals import disable_activity_stream with disable_activity_stream(): for m2m in m2m_to_preserve: for related_obj in m2m_to_preserve[m2m].all(): getattr(new_obj, m2m).add(related_obj) if not old_parent: sub_objects = [] for o2m in o2m_to_preserve: for sub_obj in o2m_to_preserve[o2m].all(): sub_model = type(sub_obj) sub_objects.append( (sub_model.__module__, sub_model.__name__, sub_obj.pk)) return new_obj, sub_objects ret = {obj: new_obj} for o2m in o2m_to_preserve: for sub_obj in o2m_to_preserve[o2m].all(): ret.update( CopyAPIView.copy_model_obj(obj, new_obj, type(sub_obj), sub_obj, creater)) return ret
def start_task(self, task, instance_group, dependent_tasks=None, instance=None): self.dependency_graph.add_job(task) self.subsystem_metrics.inc(f"{self.prefix}_tasks_started", 1) self.start_task_limit -= 1 if self.start_task_limit == 0: # schedule another run immediately after this task manager ScheduleTaskManager().schedule() from awx.main.tasks.system import handle_work_error, handle_work_success # update capacity for control node and execution node if task.controller_node: self.instances[task.controller_node].consume_capacity( settings.AWX_CONTROL_NODE_TASK_IMPACT) if task.execution_node: self.instances[task.execution_node].consume_capacity( task.task_impact) dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{ 'type': get_type_for_model(type(t)), 'id': t.id } for t in dependent_tasks] task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) # Call this to ensure Workflow nodes get spawned in timely manner ScheduleWorkflowManager().schedule() # at this point we already have control/execution nodes selected for the following cases else: task.instance_group = instance_group execution_node_msg = f' and execution node {task.execution_node}' if task.execution_node else '' logger.debug( f'Submitting job {task.log_format} controlled by {task.controller_node} to instance group {instance_group.name}{execution_node_msg}.' ) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() task.log_lifecycle("waiting") # apply_async does a NOTIFY to the channel dispatcher is listening to # postgres will treat this as part of the transaction, which is what we want if task.status != 'failed' and type(task) is not WorkflowJob: task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{ 'task': handle_work_success.name, 'kwargs': { 'task_actual': task_actual } }], errbacks=[{ 'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': { 'subtasks': [task_actual] + dependencies } }], ) # In exception cases, like a job failing pre-start checks, we send the websocket status message # for jobs going into waiting, we omit this because of performance issues, as it should go to running quickly if task.status != 'waiting': task.websocket_emit_status(task.status) # adds to on_commit
def start_task(self, task, rampart_group, dependent_tasks=None, instance=None): self.start_task_limit -= 1 if self.start_task_limit == 0: # schedule another run immediately after this task manager schedule_task_manager() from awx.main.tasks import handle_work_error, handle_work_success dependent_tasks = dependent_tasks or [] task_actual = { 'type': get_type_for_model(type(task)), 'id': task.id, } dependencies = [{ 'type': get_type_for_model(type(t)), 'id': t.id } for t in dependent_tasks] task.status = 'waiting' (start_status, opts) = task.pre_start() if not start_status: task.status = 'failed' if task.job_explanation: task.job_explanation += ' ' task.job_explanation += 'Task failed pre-start check.' task.save() # TODO: run error handler to fail sub-tasks and send notifications else: if type(task) is WorkflowJob: task.status = 'running' task.send_notification_templates('running') logger.debug('Transitioning %s to running status.', task.log_format) schedule_task_manager() elif rampart_group.is_container_group: # find one real, non-containerized instance with capacity to # act as the controller for k8s API interaction match = None for group in InstanceGroup.objects.filter( is_container_group=False): match = group.fit_task_to_most_remaining_capacity_instance( task, group.instances.all()) if match: break task.instance_group = rampart_group if match is None: logger.warn( 'No available capacity to run containerized <{}>.'. format(task.log_format)) elif task.can_run_containerized and any( ig.is_container_group for ig in task.preferred_instance_groups): task.controller_node = match.hostname else: # project updates and inventory updates don't *actually* run in pods, so # just pick *any* non-containerized host and use it as the execution node task.execution_node = match.hostname logger.debug( 'Submitting containerized {} to queue {}.'.format( task.log_format, task.execution_node)) else: task.instance_group = rampart_group if instance is not None: task.execution_node = instance.hostname logger.debug( 'Submitting {} to <instance group, instance> <{},{}>.'. format(task.log_format, task.instance_group_id, task.execution_node)) with disable_activity_stream(): task.celery_task_id = str(uuid.uuid4()) task.save() task.log_lifecycle("waiting") if rampart_group is not None: self.consume_capacity(task, rampart_group.name) def post_commit(): if task.status != 'failed' and type(task) is not WorkflowJob: # Before task is dispatched, ensure that job_event partitions exist create_partition(task.event_class._meta.db_table, start=task.created) task_cls = task._get_task_class() task_cls.apply_async( [task.pk], opts, queue=task.get_queue_name(), uuid=task.celery_task_id, callbacks=[{ 'task': handle_work_success.name, 'kwargs': { 'task_actual': task_actual } }], errbacks=[{ 'task': handle_work_error.name, 'args': [task.celery_task_id], 'kwargs': { 'subtasks': [task_actual] + dependencies } }], ) task.websocket_emit_status(task.status) # adds to on_commit connection.on_commit(post_commit)
def apply(self, project_state, schema_editor, collect_sql=False): from awx.main.signals import disable_activity_stream with disable_activity_stream(): return Migration.apply(self, project_state, schema_editor, collect_sql)