Ejemplo n.º 1
0
    def test_get_many(self, mock_warning, mock_info):
        pathways = PathwayFactory.create_batch(3)

        # Cache details for 2 of 3 programs.
        partial_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway
            for pathway in pathways[:2]
        }
        cache.set_many(partial_pathways, None)

        # When called before pathways are cached, the function should return an
        # empty list and log a warning.
        assert get_pathways(self.site) == []
        mock_warning.assert_called_once_with(
            'Failed to get credit pathway ids from the cache.')
        mock_warning.reset_mock()

        # Cache all 3 pathways
        cache.set(
            SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site.domain),
            [pathway['id'] for pathway in pathways], None)

        actual_pathways = get_pathways(self.site)

        # The 2 cached pathways should be returned while info and warning
        # messages should be logged for the missing one.
        assert {pathway['id'] for pathway in actual_pathways} ==\
               {pathway['id'] for pathway in partial_pathways.values()}
        mock_info.assert_called_with(
            'Failed to get details for 1 pathways. Retrying.')
        mock_warning.assert_called_with(
            'Failed to get details for credit pathway {id} from the cache.'.
            format(id=pathways[2]['id']))
        mock_warning.reset_mock()

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for pathway in actual_pathways:
            key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id'])
            assert pathway == partial_pathways[key]

        # Cache details for all 3 pathways.
        all_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway
            for pathway in pathways
        }
        cache.set_many(all_pathways, None)

        actual_pathways = get_pathways(self.site)

        # All 3 pathways should be returned.
        assert {pathway['id'] for pathway in actual_pathways} ==\
               {pathway['id'] for pathway in all_pathways.values()}
        assert not mock_warning.called

        for pathway in actual_pathways:
            key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id'])
            assert pathway == all_pathways[key]
Ejemplo n.º 2
0
def get_pathways(site, pathway_id=None):
    """
    Read pathways from the cache.
    The cache is populated by a management command, cache_programs.

    Arguments:
        site (Site): django.contrib.sites.models object

    Keyword Arguments:
        pathway_id (string): id identifying a specific pathway to read from the cache.

    Returns:
        list of dict, representing pathways.
        dict, if a specific pathway is requested.
    """
    missing_details_msg_tpl = 'Failed to get details for credit pathway {id} from the cache.'

    if pathway_id:
        pathway = cache.get(PATHWAY_CACHE_KEY_TPL.format(id=pathway_id))
        if not pathway:
            logger.warning(missing_details_msg_tpl.format(id=pathway_id))

        return pathway
    pathway_ids = cache.get(
        SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), [])
    if not pathway_ids:
        logger.warning('Failed to get credit pathway ids from the cache.')

    pathways = cache.get_many([
        PATHWAY_CACHE_KEY_TPL.format(id=pathway_id)
        for pathway_id in pathway_ids
    ])
    pathways = pathways.values()

    # The get_many above sometimes fails to bring back details cached on one or
    # more Memcached nodes. It doesn't look like these keys are being evicted.
    # 99% of the time all keys come back, but 1% of the time all the keys stored
    # on one or more nodes are missing from the result of the get_many. One
    # get_many may fail to bring these keys back, but a get_many occurring
    # immediately afterwards will succeed in bringing back all the keys. This
    # behavior can be mitigated by trying again for the missing keys, which is
    # what we do here. Splitting the get_many into smaller chunks may also help.
    missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways)
    if missing_ids:
        logger.info(
            'Failed to get details for {count} pathways. Retrying.'.format(
                count=len(missing_ids)))

        retried_pathways = cache.get_many([
            PATHWAY_CACHE_KEY_TPL.format(id=pathway_id)
            for pathway_id in missing_ids
        ])
        pathways += retried_pathways.values()

        still_missing_ids = set(pathway_ids) - set(pathway['id']
                                                   for pathway in pathways)
        for missing_id in still_missing_ids:
            logger.warning(missing_details_msg_tpl.format(id=missing_id))

    return pathways
Ejemplo n.º 3
0
    def test_get_many_with_missing(self, mock_cache, mock_warning, mock_info):
        pathways = PathwayFactory.create_batch(3)

        all_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways
        }

        partial_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways[:2]
        }

        def fake_get_many(keys):
            if len(keys) == 1:
                return {PATHWAY_CACHE_KEY_TPL.format(id=pathways[-1]['id']): pathways[-1]}
            else:
                return partial_pathways

        mock_cache.get.return_value = [pathway['id'] for pathway in pathways]
        mock_cache.get_many.side_effect = fake_get_many

        actual_pathways = get_pathways(self.site)

        # All 3 cached pathways should be returned. An info message should be
        # logged about the one that was initially missing, but the code should
        # be able to stitch together all the details.
        self.assertEqual(
            set(pathway['id'] for pathway in actual_pathways),
            set(pathway['id'] for pathway in all_pathways.values())
        )
        self.assertFalse(mock_warning.called)
        mock_info.assert_called_with('Failed to get details for 1 pathways. Retrying.')

        for pathway in actual_pathways:
            key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id'])
            self.assertEqual(pathway, all_pathways[key])
Ejemplo n.º 4
0
def get_pathways(site, pathway_id=None):
    """
    Read pathways from the cache.
    The cache is populated by a management command, cache_programs.

    Arguments:
        site (Site): django.contrib.sites.models object

    Keyword Arguments:
        pathway_id (string): id identifying a specific pathway to read from the cache.

    Returns:
        list of dict, representing pathways.
        dict, if a specific pathway is requested.
    """
    missing_details_msg_tpl = 'Failed to get details for credit pathway {id} from the cache.'

    if pathway_id:
        pathway = cache.get(PATHWAY_CACHE_KEY_TPL.format(id=pathway_id))
        if not pathway:
            logger.warning(missing_details_msg_tpl.format(id=pathway_id))

        return pathway
    pathway_ids = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=site.domain), [])
    if not pathway_ids:
        logger.warning('Failed to get credit pathway ids from the cache.')

    pathways = cache.get_many([PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) for pathway_id in pathway_ids])
    pathways = pathways.values()

    # The get_many above sometimes fails to bring back details cached on one or
    # more Memcached nodes. It doesn't look like these keys are being evicted.
    # 99% of the time all keys come back, but 1% of the time all the keys stored
    # on one or more nodes are missing from the result of the get_many. One
    # get_many may fail to bring these keys back, but a get_many occurring
    # immediately afterwards will succeed in bringing back all the keys. This
    # behavior can be mitigated by trying again for the missing keys, which is
    # what we do here. Splitting the get_many into smaller chunks may also help.
    missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways)
    if missing_ids:
        logger.info(
            'Failed to get details for {count} pathways. Retrying.'.format(count=len(missing_ids))
        )

        retried_pathways = cache.get_many(
            [PATHWAY_CACHE_KEY_TPL.format(id=pathway_id) for pathway_id in missing_ids]
        )
        pathways += retried_pathways.values()

        still_missing_ids = set(pathway_ids) - set(pathway['id'] for pathway in pathways)
        for missing_id in still_missing_ids:
            logger.warning(missing_details_msg_tpl.format(id=missing_id))

    return pathways
Ejemplo n.º 5
0
    def process_pathways(self, site, pathways, programs):
        """
        For each program, add references to each pathway it is a part of.
        For each pathway, replace the "programs" dict with "program_uuids",
        which only contains uuids (since program data is already cached)
        """
        processed_pathways = {}
        failure = False
        for pathway in pathways:
            try:
                pathway_id = pathway['id']
                pathway_cache_key = PATHWAY_CACHE_KEY_TPL.format(id=pathway_id)
                processed_pathways[pathway_cache_key] = pathway
                uuids = []

                for program in pathway['programs']:
                    program_uuid = program['uuid']
                    program_cache_key = PROGRAM_CACHE_KEY_TPL.format(uuid=program_uuid)
                    programs[program_cache_key]['pathway_ids'].append(pathway_id)
                    uuids.append(program_uuid)

                del pathway['programs']
                pathway['program_uuids'] = uuids
            except:  # pylint: disable=bare-except
                logger.error('Failed to process pathways for {domain}'.format(domain=site.domain))
                failure = True
        return processed_pathways, programs, failure
Ejemplo n.º 6
0
    def process_pathways(self, site, pathways, programs):
        """
        For each program, add references to each pathway it is a part of.
        For each pathway, replace the "programs" dict with "program_uuids",
        which only contains uuids (since program data is already cached)
        """
        processed_pathways = {}
        failure = False
        for pathway in pathways:
            try:
                pathway_id = pathway['id']
                pathway_cache_key = PATHWAY_CACHE_KEY_TPL.format(id=pathway_id)
                processed_pathways[pathway_cache_key] = pathway
                uuids = []

                for program in pathway['programs']:
                    program_uuid = program['uuid']
                    program_cache_key = PROGRAM_CACHE_KEY_TPL.format(
                        uuid=program_uuid)
                    programs[program_cache_key]['pathway_ids'].append(
                        pathway_id)
                    uuids.append(program_uuid)

                del pathway['programs']
                pathway['program_uuids'] = uuids
            except:  # pylint: disable=bare-except
                logger.exception(
                    f'Failed to process pathways for {site.domain}')
                failure = True
        return processed_pathways, programs, failure
Ejemplo n.º 7
0
 def fake_get_many(keys):
     if len(keys) == 1:
         return {
             PATHWAY_CACHE_KEY_TPL.format(id=pathways[-1]['id']):
             pathways[-1]
         }
     else:
         return partial_pathways
Ejemplo n.º 8
0
    def test_pathways_multiple_pages(self):
        """
        Verify that the command properly caches credit pathways when multiple pages are returned from its endpoint
        """
        UserFactory(username=self.catalog_integration.service_username)
        new_pathways = PathwayFactory.create_batch(40)
        for new_pathway in new_pathways:
            new_pathway['programs'] = []
        pathways = self.pathways + new_pathways

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        self.mock_list()
        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        # mock 3 pages of credit pathways, starting at the last
        self.mock_pathways(pathways[40:], page_number=3, final=True)
        self.mock_pathways(pathways[20:40], page_number=2, final=False)
        self.mock_pathways(pathways[:20], page_number=1, final=False)

        call_command('cache_programs')

        pathways_dict = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways
        }
        pathway_keys = list(pathways_dict.keys())

        cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        self.assertEqual(
            set(cached_pathway_keys),
            set(pathway_keys)
        )

        cached_pathways = cache.get_many(pathway_keys)
        self.assertEqual(
            set(cached_pathways),
            set(pathways_dict)
        )

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for key, pathway in cached_pathways.items():
            # cached pathways store just program uuids instead of the full programs, transform before comparing
            pathways_dict[key]['program_uuids'] = [program['uuid'] for program in pathways_dict[key]['programs']]
            del pathways_dict[key]['programs']

            self.assertEqual(pathway, pathways_dict[key])
    def test_pathways_multiple_pages(self):
        """
        Verify that the command properly caches credit pathways when multiple pages are returned from its endpoint
        """
        UserFactory(username=self.catalog_integration.service_username)
        new_pathways = PathwayFactory.create_batch(40)
        for new_pathway in new_pathways:
            new_pathway['programs'] = []
        pathways = self.pathways + new_pathways

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        self.mock_list()
        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        # mock 3 pages of credit pathways, starting at the last
        self.mock_pathways(pathways[40:], page_number=3, final=True)
        self.mock_pathways(pathways[20:40], page_number=2, final=False)
        self.mock_pathways(pathways[:20], page_number=1, final=False)

        call_command('cache_programs')

        pathways_dict = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways
        }
        pathway_keys = list(pathways_dict.keys())

        cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        self.assertEqual(
            set(cached_pathway_keys),
            set(pathway_keys)
        )

        cached_pathways = cache.get_many(pathway_keys)
        self.assertEqual(
            set(cached_pathways),
            set(pathways_dict)
        )

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for key, pathway in cached_pathways.items():
            # cached pathways store just program uuids instead of the full programs, transform before comparing
            pathways_dict[key]['program_uuids'] = [program['uuid'] for program in pathways_dict[key]['programs']]
            del pathways_dict[key]['programs']

            self.assertEqual(pathway, pathways_dict[key])
Ejemplo n.º 10
0
    def test_get_one(self, mock_warning, _mock_info):
        expected_pathway = PathwayFactory()
        expected_id = expected_pathway['id']

        self.assertEqual(get_pathways(self.site, pathway_id=expected_id), None)
        mock_warning.assert_called_once_with(
            u'Failed to get details for credit pathway {id} from the cache.'.
            format(id=expected_id))
        mock_warning.reset_mock()

        cache.set(PATHWAY_CACHE_KEY_TPL.format(id=expected_id),
                  expected_pathway, None)

        actual_pathway = get_pathways(self.site, pathway_id=expected_id)
        self.assertEqual(actual_pathway, expected_pathway)
        self.assertFalse(mock_warning.called)
Ejemplo n.º 11
0
    def test_get_one(self, mock_warning, _mock_info):
        expected_pathway = PathwayFactory()
        expected_id = expected_pathway['id']

        assert get_pathways(self.site, pathway_id=expected_id) is None
        mock_warning.assert_called_once_with(
            f'Failed to get details for credit pathway {expected_id} from the cache.'
        )
        mock_warning.reset_mock()

        cache.set(PATHWAY_CACHE_KEY_TPL.format(id=expected_id),
                  expected_pathway, None)

        actual_pathway = get_pathways(self.site, pathway_id=expected_id)
        assert actual_pathway == expected_pathway
        assert not mock_warning.called
Ejemplo n.º 12
0
    def test_handle_pathways(self):
        """
        Verify that the command requests and caches credit pathways
        """

        UserFactory(username=self.catalog_integration.service_username)

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in self.pathways
        }

        self.mock_list()
        self.mock_pathways(self.pathways)

        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        call_command('cache_programs')

        cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        pathway_keys = list(pathways.keys())
        self.assertEqual(
            set(cached_pathway_keys),
            set(pathway_keys)
        )

        cached_pathways = cache.get_many(pathway_keys)
        self.assertEqual(
            set(cached_pathways),
            set(pathways)
        )

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for key, pathway in cached_pathways.items():
            # cached pathways store just program uuids instead of the full programs, transform before comparing
            pathways[key]['program_uuids'] = [program['uuid'] for program in pathways[key]['programs']]
            del pathways[key]['programs']

            self.assertEqual(pathway, pathways[key])
    def test_handle_pathways(self):
        """
        Verify that the command requests and caches credit pathways
        """

        UserFactory(username=self.catalog_integration.service_username)

        programs = {
            PROGRAM_CACHE_KEY_TPL.format(uuid=program['uuid']): program for program in self.programs
        }

        pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in self.pathways
        }

        self.mock_list()
        self.mock_pathways(self.pathways)

        for uuid in self.uuids:
            program = programs[PROGRAM_CACHE_KEY_TPL.format(uuid=uuid)]
            self.mock_detail(uuid, program)

        call_command('cache_programs')

        cached_pathway_keys = cache.get(SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site_domain))
        pathway_keys = list(pathways.keys())
        self.assertEqual(
            set(cached_pathway_keys),
            set(pathway_keys)
        )

        cached_pathways = cache.get_many(pathway_keys)
        self.assertEqual(
            set(cached_pathways),
            set(pathways)
        )

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for key, pathway in cached_pathways.items():
            # cached pathways store just program uuids instead of the full programs, transform before comparing
            pathways[key]['program_uuids'] = [program['uuid'] for program in pathways[key]['programs']]
            del pathways[key]['programs']

            self.assertEqual(pathway, pathways[key])
Ejemplo n.º 14
0
    def test_get_one(self, mock_warning, _mock_info):
        expected_pathway = PathwayFactory()
        expected_id = expected_pathway['id']

        self.assertEqual(get_pathways(self.site, pathway_id=expected_id), None)
        mock_warning.assert_called_once_with(
            'Failed to get details for credit pathway {id} from the cache.'.format(id=expected_id)
        )
        mock_warning.reset_mock()

        cache.set(
            PATHWAY_CACHE_KEY_TPL.format(id=expected_id),
            expected_pathway,
            None
        )

        actual_pathway = get_pathways(self.site, pathway_id=expected_id)
        self.assertEqual(actual_pathway, expected_pathway)
        self.assertFalse(mock_warning.called)
Ejemplo n.º 15
0
 def fake_get_many(keys):
     if len(keys) == 1:
         return {PATHWAY_CACHE_KEY_TPL.format(id=pathways[-1]['id']): pathways[-1]}
     else:
         return partial_pathways
Ejemplo n.º 16
0
    def test_get_many(self, mock_warning, mock_info):
        pathways = PathwayFactory.create_batch(3)

        # Cache details for 2 of 3 programs.
        partial_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways[:2]
        }
        cache.set_many(partial_pathways, None)

        # When called before pathways are cached, the function should return an
        # empty list and log a warning.
        self.assertEqual(get_pathways(self.site), [])
        mock_warning.assert_called_once_with('Failed to get credit pathway ids from the cache.')
        mock_warning.reset_mock()

        # Cache all 3 pathways
        cache.set(
            SITE_PATHWAY_IDS_CACHE_KEY_TPL.format(domain=self.site.domain),
            [pathway['id'] for pathway in pathways],
            None
        )

        actual_pathways = get_pathways(self.site)

        # The 2 cached pathways should be returned while info and warning
        # messages should be logged for the missing one.
        self.assertEqual(
            set(pathway['id'] for pathway in actual_pathways),
            set(pathway['id'] for pathway in partial_pathways.values())
        )
        mock_info.assert_called_with('Failed to get details for 1 pathways. Retrying.')
        mock_warning.assert_called_with(
            'Failed to get details for credit pathway {id} from the cache.'.format(id=pathways[2]['id'])
        )
        mock_warning.reset_mock()

        # We can't use a set comparison here because these values are dictionaries
        # and aren't hashable. We've already verified that all pathways came out
        # of the cache above, so all we need to do here is verify the accuracy of
        # the data itself.
        for pathway in actual_pathways:
            key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id'])
            self.assertEqual(pathway, partial_pathways[key])

        # Cache details for all 3 pathways.
        all_pathways = {
            PATHWAY_CACHE_KEY_TPL.format(id=pathway['id']): pathway for pathway in pathways
        }
        cache.set_many(all_pathways, None)

        actual_pathways = get_pathways(self.site)

        # All 3 pathways should be returned.
        self.assertEqual(
            set(pathway['id'] for pathway in actual_pathways),
            set(pathway['id'] for pathway in all_pathways.values())
        )
        self.assertFalse(mock_warning.called)

        for pathway in actual_pathways:
            key = PATHWAY_CACHE_KEY_TPL.format(id=pathway['id'])
            self.assertEqual(pathway, all_pathways[key])