def _create_cached_program(self): """ helper method to create a cached program """ program = ProgramFactory.create() for course_key in self.course_keys: program['courses'].append(CourseFactory(id=course_key)) program['type'] = 'MicroBachelors' program['type_attrs']['coaching_supported'] = True for course in program['courses']: cache.set( CATALOG_COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_uuid=course['uuid']), [program['uuid']], None ) course_run = course['course_runs'][0]['key'] cache.set( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course_run), [program['uuid']], None ) cache.set( PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']), program, None ) return program
def get_programs(site=None, uuid=None, uuids=None, course=None, organization=None): """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object to fetch programs of. uuid (string): UUID identifying a specific program to read from the cache. uuids (list of string): UUIDs identifying a specific programs to read from the cache. course (string): course id identifying a specific course run to read from the cache. organization (string): short name for specific organization to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([ arg for arg in (site, uuid, uuids, course, organization) if arg is not None ]) != 1: raise TypeError('get_programs takes exactly one argument') if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] elif site: site_config = getattr(site, 'configuration', None) catalog_url = site_config.get_value( 'COURSE_CATALOG_API_URL') if site_config else None if site_config and catalog_url: uuids = cache.get( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning( u'Failed to get program UUIDs from the cache for site {}.'. format(site.domain)) else: uuids = [] elif organization: uuids = get_programs_for_organization(organization) if not uuids: return [] return get_programs_by_uuids(uuids)
def test_handle_programs(self): """ Verify that the command requests and caches program UUIDs and details. """ # Ideally, this user would be created in the test setup and deleted in # the one test case which covers the case where the user is missing. However, # that deletion causes "OperationalError: no such table: wiki_attachmentrevision" # when run on Jenkins. UserFactory(username=self.catalog_integration.service_username) programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } self.mock_list() self.mock_pathways(self.pathways) for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) call_command('cache_programs') cached_uuids = cache.get( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=self.site_domain)) self.assertEqual(set(cached_uuids), set(self.uuids)) program_keys = list(programs.keys()) cached_programs = cache.get_many(program_keys) # Verify that the keys were all cache hits. self.assertEqual(set(cached_programs), set(programs)) # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all programs came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for key, program in cached_programs.items(): # cached programs have a pathways field added to them, remove before comparing del program['pathway_ids'] self.assertEqual(program, programs[key]) # the courses in the child program's first curriculum (the active one) # should point to both the child program and the first program # in the cache. for course in self.child_program['curricula'][0]['courses']: for course_run in course['course_runs']: course_run_cache_key = COURSE_PROGRAMS_CACHE_KEY_TPL.format( course_run_id=course_run['key']) self.assertIn(self.programs[0]['uuid'], cache.get(course_run_cache_key)) self.assertIn(self.child_program['uuid'], cache.get(course_run_cache_key))
def get_courses(self, programs): """ Get all courses for the programs. TODO: when course discovery can handle it, use that instead. That will allow us to put all course runs in the cache not just the course runs in a program. Therefore, a cache miss would be different from a course not in a program. """ course_runs = defaultdict(list) for program in programs.values(): for course_run_key in course_run_keys_for_program(program): course_run_cache_key = COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course_run_key) course_runs[course_run_cache_key].append(program['uuid']) return course_runs
def test_get_from_course(self, mock_warning, _mock_info): expected_program = ProgramFactory() expected_course = expected_program['courses'][0]['course_runs'][0][ 'key'] self.assertEqual(get_programs(course=expected_course), []) cache.set( COURSE_PROGRAMS_CACHE_KEY_TPL.format( course_run_id=expected_course), [expected_program['uuid']], None) cache.set(PROGRAM_CACHE_KEY_TPL.format(uuid=expected_program['uuid']), expected_program, None) actual_program = get_programs(course=expected_course) self.assertEqual(actual_program, [expected_program]) self.assertFalse(mock_warning.called)
def get_courses(self, programs): """ Get all courses for the programs. TODO: when course discovery can handle it, use that instead. That will allow us to put all course runs in the cache not just the course runs in a program. Therefore, a cache miss would be different from a course not in a program. """ course_runs = defaultdict(list) failure = False for program in programs.values(): for course in program['courses']: for course_run in course['course_runs']: course_run_cache_key = COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course_run['key']) course_runs[course_run_cache_key].append(program['uuid']) return course_runs, failure
def test_get_from_course(self, mock_warning, _mock_info): expected_program = ProgramFactory() expected_course = expected_program['courses'][0]['course_runs'][0]['key'] self.assertEqual(get_programs(course=expected_course), []) cache.set( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=expected_course), [expected_program['uuid']], None ) cache.set( PROGRAM_CACHE_KEY_TPL.format(uuid=expected_program['uuid']), expected_program, None ) actual_program = get_programs(course=expected_course) self.assertEqual(actual_program, [expected_program]) self.assertFalse(mock_warning.called)
def get_programs(site=None, uuid=None, uuids=None, course=None): # pylint: disable=redefined-outer-name """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object uuid (string): UUID identifying a specific program to read from the cache. uuids (list of string): UUIDs identifying a specific programs to read from the cache. course (string): course id identifying a specific course run to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([arg for arg in (site, uuid, uuids, course) if arg is not None]) != 1: raise TypeError('get_programs takes exactly one argument') if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] elif site: uuids = cache.get( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning( u'Failed to get program UUIDs from the cache for site {}.'. format(site.domain)) return get_programs_by_uuids(uuids)
def get_programs(site=None, uuid=None, uuids=None, course=None): # pylint: disable=redefined-outer-name """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object uuid (string): UUID identifying a specific program to read from the cache. uuids (list of string): UUIDs identifying a specific programs to read from the cache. course (string): course id identifying a specific course run to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([arg for arg in (site, uuid, uuids, course) if arg is not None]) != 1: raise TypeError('get_programs takes exactly one argument') if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get(COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] elif site: uuids = cache.get(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning(u'Failed to get program UUIDs from the cache for site {}.'.format(site.domain)) return get_programs_by_uuids(uuids)
def get_programs(site=None, uuid=None, course=None): # pylint: disable=redefined-outer-name """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object uuid (string): UUID identifying a specific program to read from the cache. course (string): course id identifying a specific course run to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([arg for arg in (site, uuid, course) if arg is not None]) != 1: raise TypeError('get_programs takes exactly one argument') missing_details_msg_tpl = u'Failed to get details for program {uuid} from the cache.' if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get(COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] else: uuids = cache.get(SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning(u'Failed to get program UUIDs from the cache for site {}.'.format(site.domain)) programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in uuids]) programs = list(programs.values()) # The get_many above sometimes fails to bring back details cached on one or # more Memcached nodes. It doesn't look like these keys are being evicted. # 99% of the time all keys come back, but 1% of the time all the keys stored # on one or more nodes are missing from the result of the get_many. One # get_many may fail to bring these keys back, but a get_many occurring # immediately afterwards will succeed in bringing back all the keys. This # behavior can be mitigated by trying again for the missing keys, which is # what we do here. Splitting the get_many into smaller chunks may also help. missing_uuids = set(uuids) - set(program['uuid'] for program in programs) if missing_uuids: logger.info( u'Failed to get details for {count} programs. Retrying.'.format(count=len(missing_uuids)) ) retried_programs = cache.get_many([PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in missing_uuids]) programs += list(retried_programs.values()) still_missing_uuids = set(uuids) - set(program['uuid'] for program in programs) for uuid in still_missing_uuids: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return programs
def test_handle_programs(self): """ Verify that the command requests and caches program UUIDs and details. """ # Ideally, this user would be created in the test setup and deleted in # the one test case which covers the case where the user is missing. However, # that deletion causes "OperationalError: no such table: wiki_attachmentrevision" # when run on Jenkins. UserFactory(username=self.catalog_integration.service_username) programs = { PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs } self.mock_list() self.mock_pathways(self.pathways) for uuid in self.uuids: program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)] self.mock_detail(uuid, program) call_command('cache_programs') cached_uuids = cache.get( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=self.site_domain)) assert set(cached_uuids) == set(self.uuids) program_keys = list(programs.keys()) cached_programs = cache.get_many(program_keys) # Verify that the keys were all cache hits. assert set(cached_programs) == set(programs) # We can't use a set comparison here because these values are dictionaries # and aren't hashable. We've already verified that all programs came out # of the cache above, so all we need to do here is verify the accuracy of # the data itself. for key, program in cached_programs.items(): # cached programs have a pathways field added to them, remove before comparing del program['pathway_ids'] assert program == programs[key] # the courses in the child program's first curriculum (the active one) # should point to both the child program and the first program # in the cache. for course in self.child_program['curricula'][0]['courses']: for course_run in course['course_runs']: course_run_cache_key = COURSE_PROGRAMS_CACHE_KEY_TPL.format( course_run_id=course_run['key']) assert self.programs[0]['uuid'] in cache.get( course_run_cache_key) assert self.child_program['uuid'] in cache.get( course_run_cache_key) # for each program, assert that the program's UUID is in a cached list of # program UUIDS by program type and a cached list of UUIDs by authoring organization for program in self.programs: program_type = normalize_program_type(program.get('type', 'None')) program_type_slug = program.get('type_attrs', {}).get('slug') program_type_cache_key = PROGRAMS_BY_TYPE_CACHE_KEY_TPL.format( site_id=self.site.id, program_type=program_type) program_type_slug_cache_key = PROGRAMS_BY_TYPE_SLUG_CACHE_KEY_TPL.format( site_id=self.site.id, program_slug=program_type_slug) assert program['uuid'] in cache.get(program_type_cache_key) assert program['uuid'] in cache.get(program_type_slug_cache_key) for organization in program['authoring_organizations']: organization_cache_key = PROGRAMS_BY_ORGANIZATION_CACHE_KEY_TPL.format( org_key=organization['key']) assert program['uuid'] in cache.get(organization_cache_key)
def get_programs(site=None, uuid=None, course=None): # pylint: disable=redefined-outer-name """Read programs from the cache. The cache is populated by a management command, cache_programs. Keyword Arguments: site (Site): django.contrib.sites.models object uuid (string): UUID identifying a specific program to read from the cache. course (string): course id identifying a specific course run to read from the cache. Returns: list of dict, representing programs. dict, if a specific program is requested. """ if len([arg for arg in (site, uuid, course) if arg is not None]) != 1: raise TypeError('get_programs takes exactly one argument') missing_details_msg_tpl = u'Failed to get details for program {uuid} from the cache.' if uuid: program = cache.get(PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)) if not program: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return program elif course: uuids = cache.get( COURSE_PROGRAMS_CACHE_KEY_TPL.format(course_run_id=course)) if not uuids: # Currently, the cache does not differentiate between a cache miss and a course # without programs. After this is changed, log any cache misses here. return [] else: uuids = cache.get( SITE_PROGRAM_UUIDS_CACHE_KEY_TPL.format(domain=site.domain), []) if not uuids: logger.warning( u'Failed to get program UUIDs from the cache for site {}.'. format(site.domain)) programs = cache.get_many( [PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in uuids]) programs = list(programs.values()) # The get_many above sometimes fails to bring back details cached on one or # more Memcached nodes. It doesn't look like these keys are being evicted. # 99% of the time all keys come back, but 1% of the time all the keys stored # on one or more nodes are missing from the result of the get_many. One # get_many may fail to bring these keys back, but a get_many occurring # immediately afterwards will succeed in bringing back all the keys. This # behavior can be mitigated by trying again for the missing keys, which is # what we do here. Splitting the get_many into smaller chunks may also help. missing_uuids = set(uuids) - set(program['uuid'] for program in programs) if missing_uuids: logger.info( u'Failed to get details for {count} programs. Retrying.'.format( count=len(missing_uuids))) retried_programs = cache.get_many([ PROGRAM_CACHE_KEY_TPL.format(uuid=uuid) for uuid in missing_uuids ]) programs += list(retried_programs.values()) still_missing_uuids = set(uuids) - set(program['uuid'] for program in programs) for uuid in still_missing_uuids: logger.warning(missing_details_msg_tpl.format(uuid=uuid)) return programs