Esempio n. 1
0
    def test_search_from_url(self):
        """ ensure that we get the error back when the backend fails """
        searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME)
        searcher.index(
            "courseware_content",
            [
                {
                    "id": "FAKE_ID_1",
                    "content": {
                        "text": "Little Darling, it's been a long long lonely winter"
                    }
                }
            ]
        )
        searcher.index(
            "courseware_content",
            [
                {
                    "id": "FAKE_ID_2",
                    "content": {
                        "text": "Little Darling, it's been a year since sun been gone"
                    }
                }
            ]
        )
        searcher.index("test_doc", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}])

        code, results = post_request({"search_string": "sun"})
        self.assertGreater(code, 499)
        self.assertEqual(results["error"], 'An error occurred when searching for "sun"')

        with self.assertRaises(StandardError):
            searcher.search(query_string="test search")
Esempio n. 2
0
 def _display_name_filter(self, library, display_name):
     """ Filters library children by capa type"""
     search_engine = SearchEngine.get_search_engine(index="library_index")
     if search_engine:
         log.warn("search engine found")
         filter_clause = {
             "library": unicode(normalize_key_for_search(library.location.library_key)),
             # "content_type": CapaDescriptor.INDEX_CONTENT_TYPE,
             # "display_name": display_name
         }
         search_result = search_engine.search(field_dictionary=filter_clause)
         new_results = search_result.get('results', [])
         results = []
         for r in new_results:
             v = self.deep_search(["display_name"], r)
             if v['display_name'] == display_name:
                 results.append(r)
         return [LibraryUsageLocator.from_string(item['data']['id']) for item in results]
     else:
         log.warn("search engine NOT found")
         #return [key for key in library.children if self._filter_child_name(key, display_name)]
         results = []
         for r in library.children:
             p = self.store.get_item(r, 1)
             v = {}
             for field in p.fields.values():
                 v[field.name] = field.read_json(p)
             # v = p.get_explicitly_set_fields_by_scope(Scope.settings)
             if v.get('display_name') == display_name:
                 results.append(r)
         return results
Esempio n. 3
0
 def test_abstract_impl(self):
     """ Make sure that if one tries to use the abstract base, then operations yeild NotImplementedError s """
     abstract = SearchEngine("test_index_name")
     test_string = "A test string"
     with self.assertRaises(NotImplementedError):
         abstract.index([{"name": test_string}])
     with self.assertRaises(NotImplementedError):
         abstract.search(test_string)
     with self.assertRaises(NotImplementedError):
         abstract.remove(["test_id"])
Esempio n. 4
0
    def remove_deleted_items(cls, structure_key):
        """ Remove item from Course About Search_index """
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        response = searcher.search(field_dictionary=cls._get_location_info(structure_key))
        result_ids = [result["data"]["id"] for result in response["results"]]
        searcher.remove(result_ids)
Esempio n. 5
0
 def engine(cls):
     """
     Return course team search engine (if feature is enabled).
     """
     try:
         return SearchEngine.get_search_engine(index=cls.INDEX_NAME)
     except ConnectionError as err:
         logging.error('Error connecting to elasticsearch: %s', err)
         raise ElasticSearchConnectionError  # lint-amnesty, pylint: disable=raise-missing-from
 def engine(cls):
     """
     Return course team search engine (if feature is enabled).
     """
     try:
         return SearchEngine.get_search_engine(index=cls.INDEX_NAME)
     except ConnectionError as err:
         logging.error('Error connecting to elasticsearch: %s', err)
         raise ElasticSearchConnectionError
Esempio n. 7
0
 def engine(cls):
     """
     Return course team search engine (if feature is enabled).
     """
     try:
         return SearchEngine.get_search_engine(index=cls.INDEX_NAME)
     except ConnectionError as err:
         logging.error(u'Error connecting to elasticsearch: %s', err)
         raise ElasticSearchConnectionError
Esempio n. 8
0
 def test_search_from_url(self):
     """ ensure that we get the error back when the backend fails """
     searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME)
     with self.assertRaises(StandardError):
         searcher.index("courseware_content", [{
             "id": "FAKE_ID_3",
             "content": {
                 "text": "Here comes the sun"
             }
         }])
Esempio n. 9
0
 def remove_all_libraries(cls):
     """
     Remove all libraries from the index
     """
     searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
     response = searcher.search(doc_type=cls.LIBRARY_DOCUMENT_TYPE,
                                filter_dictionary={},
                                size=MAX_SIZE)
     ids = [result["data"]["id"] for result in response["results"]]
     searcher.remove(cls.LIBRARY_DOCUMENT_TYPE, ids)
Esempio n. 10
0
 def _perform_elastic_search(cls, filter_terms, text_search):
     """
     Build a query and search directly on elasticsearch
     """
     searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
     return _translate_hits(searcher._es.search(  # pylint: disable=protected-access
         index=searcher.index_name,
         body=cls.build_elastic_query(filter_terms, text_search),
         size=MAX_SIZE
     ))
Esempio n. 11
0
 def remove_all_items(cls):
     """
     Remove all items from the index
     """
     searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
     response = searcher.search(doc_type=cls.DOCUMENT_TYPE,
                                filter_dictionary={},
                                size=MAX_SIZE)
     ids = [result["data"]["id"] for result in response["results"]]
     searcher.remove(cls.DOCUMENT_TYPE, ids, **cls.SEARCH_KWARGS)
Esempio n. 12
0
    def handle(self, *args, **options):
        """
        By convention set by Django developers, this method actually executes command's actions.
        So, there could be no better docstring than emphasize this once again.
        """
        all_option = options.get('all', False)
        setup_option = options.get('setup', False)
        index_all_courses_option = all_option or setup_option

        if len(args) == 0 and not index_all_courses_option:
            raise CommandError(
                u"reindex_course requires one or more arguments: <course_id>")

        store = modulestore()

        if index_all_courses_option:
            index_name = CoursewareSearchIndexer.INDEX_NAME
            doc_type = CoursewareSearchIndexer.DOCUMENT_TYPE
            if setup_option:
                try:
                    # try getting the ElasticSearch engine
                    searcher = SearchEngine.get_search_engine(index_name)
                except exceptions.ElasticsearchException as exc:
                    logging.exception('Search Engine error - %s', unicode(exc))
                    return

                index_exists = searcher._es.indices.exists(index=index_name)  # pylint: disable=protected-access
                doc_type_exists = searcher._es.indices.exists_type(  # pylint: disable=protected-access
                    index=index_name,
                    doc_type=doc_type)

                index_mapping = searcher._es.indices.get_mapping(  # pylint: disable=protected-access
                    index=index_name,
                    doc_type=doc_type
                ) if index_exists and doc_type_exists else {}

                if index_exists and index_mapping:
                    return

            # if reindexing is done during devstack setup step, don't prompt the user
            if setup_option or query_yes_no(self.CONFIRMATION_PROMPT,
                                            default="no"):
                # in case of --setup or --all, get the list of course keys from all courses
                # that are stored in the modulestore
                course_keys = [
                    course.id for course in modulestore().get_courses()
                ]
            else:
                return
        else:
            # in case course keys are provided as arguments
            course_keys = map(self._parse_course_key, args)

        for course_key in course_keys:
            CoursewareSearchIndexer.do_course_reindex(store, course_key)
Esempio n. 13
0
def delete_course_task(user_id, course_key_string):

    profile = UserProfile.objects.get(pk=user_id)
    user = User.objects.get(pk=profile.user_id)

    course_key = CourseKey.from_string(course_key_string)
    delete_course_and_groups(course_key, user.id)
    searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME)
    if searcher != None:
        CoursewareSearchIndexer.remove_deleted_items(searcher, CourseKey.from_string(course_key_string), [])
        searcher.remove(CourseAboutSearchIndexer.DISCOVERY_DOCUMENT_TYPE, [course_key_string])
Esempio n. 14
0
    def handle(self, *args, **options):
        """
        By convention set by Django developers, this method actually executes command's actions.
        So, there could be no better docstring than emphasize this once again.
        """
        course_ids = options['course_ids']
        all_option = options['all']
        setup_option = options['setup']
        index_all_courses_option = all_option or setup_option

        if (not len(course_ids) and not index_all_courses_option) or \
                (len(course_ids) and index_all_courses_option):
            raise CommandError("reindex_course requires one or more <course_id>s OR the --all or --setup flags.")

        store = modulestore()

        if index_all_courses_option:
            index_name = CoursewareSearchIndexer.INDEX_NAME
            doc_type = CoursewareSearchIndexer.DOCUMENT_TYPE
            if setup_option:
                try:
                    # try getting the ElasticSearch engine
                    searcher = SearchEngine.get_search_engine(index_name)
                except exceptions.ElasticsearchException as exc:
                    logging.exception(u'Search Engine error - %s', exc)
                    return

                index_exists = searcher._es.indices.exists(index=index_name)  # pylint: disable=protected-access
                doc_type_exists = searcher._es.indices.exists_type(  # pylint: disable=protected-access
                    index=index_name,
                    doc_type=doc_type
                )

                index_mapping = searcher._es.indices.get_mapping(  # pylint: disable=protected-access
                    index=index_name,
                    doc_type=doc_type
                ) if index_exists and doc_type_exists else {}

                if index_exists and index_mapping:
                    return

            # if reindexing is done during devstack setup step, don't prompt the user
            if setup_option or query_yes_no(self.CONFIRMATION_PROMPT, default="no"):
                # in case of --setup or --all, get the list of course keys from all courses
                # that are stored in the modulestore
                course_keys = [course.id for course in modulestore().get_courses()]
            else:
                return
        else:
            # in case course keys are provided as arguments
            course_keys = map(self._parse_course_key, course_ids)

        for course_key in course_keys:
            CoursewareSearchIndexer.do_course_reindex(store, course_key)
    def test_task_indexing_course(self):
        """ Making sure that the receiver correctly fires off the task when invoked by signal """
        searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME)
        response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
        self.assertEqual(response["total"], 0)

        listen_for_course_publish(self, self.course.id)

        # Note that this test will only succeed if celery is working in inline mode
        response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
        self.assertEqual(response["total"], 3)
Esempio n. 16
0
    def setUp(self):
        """
        Set up tests.
        """
        super(ReindexCourseTeamTest, self).setUp()

        self.team1 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team1')
        self.team2 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team2')
        self.team3 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team3')

        self.search_engine = SearchEngine.get_search_engine(index='index_course_team')
def index_course_programs(course_id):
    """
    reindex only the program that containts course

    """
    INDEX_NAME = "courseware_index"
    searcher = SearchEngine.get_search_engine(INDEX_NAME)
    if not searcher:
        return
    course_key = CourseKey.from_string(course_id)
    programs = Program.objects.filter(courses__course_key=course_key)
    for program in programs:
        program_banner_image_url = program.banner_image.url if program.banner_image else '/static/images/banner-black-background.jpg'
        if program.start <= datetime.now(pytz.UTC).date():
            program_info = {
                'id': program.id,
                'course': program.id,
                'content': {
                    'display_name': program.name,
                    'overview': program.short_description
                },
                'image_url': program_banner_image_url,
                'start': program.start,
                'language': program.language.code,
                'subject': program.subject.name,
                'is_program': True,
            }
            # Broad exception handler to protect around and report problems with indexing
            try:
                searcher.index('course_info', [program_info])
            except:  # pylint: disable=bare-except
                log.exception(
                    "Program discovery indexing error encountered %s",
                    program_info.get('id', ''),
                )
                return JsonResponse({
                    'success': False,
                    'error': 'Program discovery indexing error encountered ',
                    'Program': program.name
                })
        else:
            return JsonResponse({
                'success':
                False,
                'error':
                'This ' + program.name + ' program start date is in future: ' +
                str(program.start),
                'suggestion':
                'Please set past progrm start date',
                'Program':
                program.name
            })
    return True
    def test_task_library_update(self):
        """ Making sure that the receiver correctly fires off the task when invoked by signal """
        searcher = SearchEngine.get_search_engine(LibrarySearchIndexer.INDEX_NAME)
        library_search_key = unicode(normalize_key_for_search(self.library.location.library_key))
        response = searcher.search(field_dictionary={"library": library_search_key})
        self.assertEqual(response["total"], 0)

        listen_for_library_update(self, self.library.location.library_key)

        # Note that this test will only succeed if celery is working in inline mode
        response = searcher.search(field_dictionary={"library": library_search_key})
        self.assertEqual(response["total"], 2)
Esempio n. 19
0
 def test_abstract_impl(self):
     """ Make sure that if one tries to use the abstract base, then operations yeild NotImplementedError s """
     abstract = SearchEngine("test_index_name")
     test_string = "A test string"
     with self.assertRaises(NotImplementedError):
         abstract.index("test_doc", [{"name": test_string}])
     with self.assertRaises(NotImplementedError):
         abstract.search(test_string)
     with self.assertRaises(NotImplementedError):
         abstract.remove("test_doc", ["test_id"])
Esempio n. 20
0
    def remove_deleted_items(cls, structure_key):
        """ Remove item from Course About Search_index """
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        response = searcher.search(
            doc_type=cls.DISCOVERY_DOCUMENT_TYPE,
            field_dictionary=cls._get_location_info(structure_key)
        )
        result_ids = [result["data"]["id"] for result in response["results"]]
        searcher.remove(cls.DISCOVERY_DOCUMENT_TYPE, result_ids)
    def setUp(self):
        """
        Set up tests.
        """
        super(ReindexCourseTeamTest, self).setUp()  # lint-amnesty, pylint: disable=super-with-arguments

        self.team1 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team1')
        self.team2 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team2')
        self.team3 = CourseTeamFactory(course_id=COURSE_KEY1, team_id='team3')

        self.search_engine = SearchEngine.get_search_engine(
            index='index_course_team')
def index_programs_information(request):
    """
    Add all the program to the course discovery index

    """
    INDEX_NAME = "courseware_index"
    searcher = SearchEngine.get_search_engine(INDEX_NAME)
    if not searcher:
        return

    programs = Program.objects.all()
    for program in programs:
        if program.start <= datetime.now(pytz.UTC).date():
            program_info = {
                'id': program.id,
                'course': program.id,
                'content': {
                    'display_name': program.name,
                    'overview': program.short_description
                },
                'image_url': program.banner_image.url,
                'start': program.start,
                'language': program.language.code,
                'subject': program.subject.name,
                'is_program': True,
            }
            # Broad exception handler to protect around and report problems with indexing
            try:
                searcher.index('course_info', [program_info])
            except:  # pylint: disable=bare-except
                log.exception(
                    "Program discovery indexing error encountered %s",
                    program_info.get('id', ''),
                )
                return JsonResponse({
                    'success': False,
                    'error': 'Program discovery indexing error encountered ',
                    'Program': program.name
                })
        else:
            return JsonResponse({
                'success':
                False,
                'error':
                'This ' + program.name + ' program start date is in future: ' +
                str(program.start),
                'suggestion':
                'Please set past progrm start date',
                'Program':
                program.name
            })

    return redirect(reverse('show-programs'))
Esempio n. 23
0
    def test_task_library_update(self):
        """ Making sure that the receiver correctly fires off the task when invoked by signal """
        searcher = SearchEngine.get_search_engine(LibrarySearchIndexer.INDEX_NAME)
        library_search_key = unicode(normalize_key_for_search(self.library.location.library_key))
        response = searcher.search(field_dictionary={"library": library_search_key})
        self.assertEqual(response["total"], 0)

        listen_for_library_update(self, self.library.location.library_key)

        # Note that this test will only succeed if celery is working in inline mode
        response = searcher.search(field_dictionary={"library": library_search_key})
        self.assertEqual(response["total"], 2)
Esempio n. 24
0
 def _problem_type_filter(self, library, capa_type):
     """ Filters library children by capa type"""
     search_engine = SearchEngine.get_search_engine(index="library_index")
     if search_engine:
         filter_clause = {
             "library": six.text_type(normalize_key_for_search(library.location.library_key)),
             "content_type": ProblemBlock.INDEX_CONTENT_TYPE,
             "problem_types": capa_type
         }
         search_result = search_engine.search(field_dictionary=filter_clause)
         results = search_result.get('results', [])
         return [LibraryUsageLocator.from_string(item['data']['id']) for item in results]
     else:
         return [key for key in library.children if self._filter_child(key, capa_type)]
Esempio n. 25
0
    def handle(self, *args, **options):
        """
        By convention set by Django developers, this method actually executes command's actions.
        So, there could be no better docstring than emphasize this once again.
        """
        course_ids = options['course_ids']
        all_option = options['all']
        setup_option = options['setup']
        index_all_courses_option = all_option or setup_option

        if (not len(course_ids) and not index_all_courses_option) or (len(course_ids) and index_all_courses_option):  # lint-amnesty, pylint: disable=len-as-condition
            raise CommandError("reindex_course requires one or more <course_id>s OR the --all or --setup flags.")

        store = modulestore()

        if index_all_courses_option:
            index_names = (CoursewareSearchIndexer.INDEX_NAME, CourseAboutSearchIndexer.INDEX_NAME)
            if setup_option:
                for index_name in index_names:
                    try:
                        searcher = SearchEngine.get_search_engine(index_name)
                    except exceptions.ElasticsearchException as exc:
                        logging.exception('Search Engine error - %s', exc)
                        return

                    index_exists = searcher._es.indices.exists(index=index_name)  # pylint: disable=protected-access

                    index_mapping = searcher._es.indices.get_mapping(  # pylint: disable=protected-access
                        index=index_name,
                    ) if index_exists else {}

                    if index_exists and index_mapping:
                        return

            # if reindexing is done during devstack setup step, don't prompt the user
            if setup_option or query_yes_no(self.CONFIRMATION_PROMPT, default="no"):
                # in case of --setup or --all, get the list of course keys from all courses
                # that are stored in the modulestore
                course_keys = [course.id for course in modulestore().get_courses()]
            else:
                return
        else:
            # in case course keys are provided as arguments
            course_keys = list(map(self._parse_course_key, course_ids))

        for course_key in course_keys:
            try:
                CoursewareSearchIndexer.do_course_reindex(store, course_key)
            except Exception as exc:  # lint-amnesty, pylint: disable=broad-except
                logging.exception('Error indexing course %s due to the error: %s', course_key, exc)
    def test_task_indexing_course(self):
        """ Making sure that the receiver correctly fires off the task when invoked by signal """
        searcher = SearchEngine.get_search_engine(
            CoursewareSearchIndexer.INDEX_NAME)
        response = searcher.search(
            field_dictionary={"course": unicode(self.course.id)})
        self.assertEqual(response["total"], 0)

        listen_for_course_publish(self, self.course.id)

        # Note that this test will only succeed if celery is working in inline mode
        response = searcher.search(
            field_dictionary={"course": unicode(self.course.id)})
        self.assertEqual(response["total"], 3)
Esempio n. 27
0
 def _problem_type_filter(self, library, capa_type):
     """ Filters library children by capa type"""
     search_engine = SearchEngine.get_search_engine(index="library_index")
     if search_engine:
         filter_clause = {
             "library": unicode(normalize_key_for_search(library.location.library_key)),
             "content_type": CapaDescriptor.INDEX_CONTENT_TYPE,
             "problem_types": capa_type
         }
         search_result = search_engine.search(field_dictionary=filter_clause)
         results = search_result.get('results', [])
         return [LibraryUsageLocator.from_string(item['data']['id']) for item in results]
     else:
         return [key for key in library.children if self._filter_child(key, capa_type)]
def reindex_specific_program(request, pk):
    """
    reindex specific program

    """
    INDEX_NAME = "courseware_index"
    searcher = SearchEngine.get_search_engine(INDEX_NAME)
    if not searcher:
        return

    try:
        program = Program.objects.get(pk=pk)
    except Exception, e:
        return JsonResponse({
            'success': False,
            'error': 'Program not found in this id ' + pk,
        })
Esempio n. 29
0
def delete_temp_user_task(request, user_id):

    profile = UserProfile.objects.get(pk=user_id)
    user = User.objects.get(pk=profile.user_id)

    courses = [format_course_for_view(c) for c in get_courses_accessible_to_user(request, user)[0]]
    libraries = [format_library_for_view(lib, user) for lib in accessible_libraries_list(user)]

    for course in courses:
        course_key = CourseKey.from_string(course["course_key"])
        delete_course_and_groups(course_key, user.id)
        searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME)
        if searcher != None:
            CoursewareSearchIndexer.remove_deleted_items(searcher, CourseKey.from_string(course_key_string), [])
            searcher.remove(CourseAboutSearchIndexer.DISCOVERY_DOCUMENT_TYPE, [course_key_string])

    for library in libraries:
        library_key = CourseKey.from_string(library['library_key'])
        delete_course_and_groups(library_key, user.id)
Esempio n. 30
0
    def index_libraries(cls, library_keys):
        """
        Index the specified libraries. If they already exist, replace them with new ones.
        """
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)

        library_dicts = []

        for library_key in library_keys:
            ref = ContentLibrary.objects.get_by_key(library_key)
            lib_bundle = LibraryBundle(library_key,
                                       ref.bundle_uuid,
                                       draft_name=DRAFT_NAME)
            num_blocks = len(lib_bundle.get_top_level_usages())
            last_published = lib_bundle.get_last_published_time()
            last_published_str = None
            if last_published:
                last_published_str = last_published.strftime(
                    '%Y-%m-%dT%H:%M:%SZ')
            (has_unpublished_changes,
             has_unpublished_deletes) = lib_bundle.has_changes()

            bundle_metadata = get_bundle(ref.bundle_uuid)

            # NOTE: Increment ContentLibraryIndexer.SCHEMA_VERSION if the following schema is updated to avoid dealing
            # with outdated indexes which might cause errors due to missing/invalid attributes.
            library_dict = {
                "schema_version": ContentLibraryIndexer.SCHEMA_VERSION,
                "id": str(library_key),
                "uuid": str(bundle_metadata.uuid),
                "title": bundle_metadata.title,
                "description": bundle_metadata.description,
                "num_blocks": num_blocks,
                "version": bundle_metadata.latest_version,
                "last_published": last_published_str,
                "has_unpublished_changes": has_unpublished_changes,
                "has_unpublished_deletes": has_unpublished_deletes,
            }
            library_dicts.append(library_dict)

        return searcher.index(cls.LIBRARY_DOCUMENT_TYPE, library_dicts)
Esempio n. 31
0
    def get_items(cls, ids=None, filter_terms=None, text_search=None):
        """
        Retrieve a list of items from the index.
        Arguments:
            ids - List of ids to be searched for in the index
            filter_terms - Dictionary of filters to be applied
            text_search - String which is used to do a text search in the supported indexes.
        """
        if filter_terms is None:
            filter_terms = {}
        if ids is not None:
            filter_terms = {
                "id": [str(item) for item in ids],
                "schema_version": [cls.SCHEMA_VERSION],
                **filter_terms,
            }
        if text_search:
            response = cls._perform_elastic_search(filter_terms, text_search)
        else:
            searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
            response = searcher.search(field_dictionary=filter_terms, size=MAX_SIZE)

        response = [result["data"] for result in response["results"]]
        return sorted(response, key=lambda i: i["id"])
Esempio n. 32
0
    def add_to_search_index(modulestore, location, delete=False, raise_on_error=False):
        """
        Add to courseware search index from given location and its children
        """
        error_list = []
        # TODO - inline for now, need to move this out to a celery task
        searcher = SearchEngine.get_search_engine(INDEX_NAME)
        if not searcher:
            return

        if isinstance(location, CourseLocator):
            course_key = location
        else:
            course_key = location.course_key

        location_info = {
            "course": unicode(course_key),
        }

        def _fetch_item(item_location):
            """ Fetch the item from the modulestore location, log if not found, but continue """
            try:
                if isinstance(item_location, CourseLocator):
                    item = modulestore.get_course(item_location)
                else:
                    item = modulestore.get_item(item_location, revision=ModuleStoreEnum.RevisionOption.published_only)
            except ItemNotFoundError:
                log.warning('Cannot find: %s', item_location)
                return None

            return item

        def index_item_location(item_location, current_start_date):
            """ add this item to the search index """
            item = _fetch_item(item_location)
            if not item:
                return

            is_indexable = hasattr(item, "index_dictionary")
            # if it's not indexable and it does not have children, then ignore
            if not is_indexable and not item.has_children:
                return

            # if it has a defined start, then apply it and to it's children
            if item.start and (not current_start_date or item.start > current_start_date):
                current_start_date = item.start

            if item.has_children:
                for child_loc in item.children:
                    index_item_location(child_loc, current_start_date)

            item_index = {}
            item_index_dictionary = item.index_dictionary() if is_indexable else None

            # if it has something to add to the index, then add it
            if item_index_dictionary:
                try:
                    item_index.update(location_info)
                    item_index.update(item_index_dictionary)
                    item_index['id'] = unicode(item.scope_ids.usage_id)
                    if current_start_date:
                        item_index['start_date'] = current_start_date

                    searcher.index(DOCUMENT_TYPE, item_index)
                except Exception as err:  # pylint: disable=broad-except
                    # broad exception so that index operation does not fail on one item of many
                    log.warning('Could not index item: %s - %s', item_location, unicode(err))
                    error_list.append(_('Could not index item: {}').format(item_location))

        def remove_index_item_location(item_location):
            """ remove this item from the search index """
            item = _fetch_item(item_location)
            if item:
                if item.has_children:
                    for child_loc in item.children:
                        remove_index_item_location(child_loc)

                searcher.remove(DOCUMENT_TYPE, unicode(item.scope_ids.usage_id))

        try:
            if delete:
                remove_index_item_location(location)
            else:
                index_item_location(location, None)
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %s",
                course_key,
                unicode(err)
            )
            error_list.append(_('General indexing error occurred'))

        if raise_on_error and error_list:
            raise SearchIndexingError(_('Error(s) present during indexing'), error_list)
Esempio n. 33
0
 def searcher(self):
     """ cached instance of search engine """
     if self._searcher is None:
         self._searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME)
     return self._searcher
Esempio n. 34
0
    def index_about_information(cls, modulestore, course):
        """
        Add the given course to the course discovery index

        Arguments:
        modulestore - modulestore object to use for operations

        course - course object from which to take properties, locate about information
        """
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        course_id = unicode(course.id)
        course_info = {
            'id': course_id,
            'course': course_id,
            'content': {},
            'image_url': course_image_url(course),
        }

        # load data for all of the 'about' modules for this course into a dictionary
        about_dictionary = {
            item.location.name: item.data
            for item in modulestore.get_items(course.id,
                                              qualifiers={"category": "about"})
        }

        about_context = {
            "course": course,
            "about_dictionary": about_dictionary,
        }

        for about_information in cls.ABOUT_INFORMATION_TO_INCLUDE:
            # Broad exception handler so that a single bad property does not scupper the collection of others
            try:
                section_content = about_information.get_value(**about_context)
            except:  # pylint: disable=bare-except
                section_content = None
                log.warning(
                    "Course discovery could not collect property %s for course %s",
                    about_information.property_name,
                    course_id,
                    exc_info=True,
                )

            if section_content:
                if about_information.index_flags & AboutInfo.ANALYSE:
                    analyse_content = section_content
                    if isinstance(section_content, basestring):
                        analyse_content = strip_html_content_to_text(
                            section_content)
                    course_info['content'][
                        about_information.property_name] = analyse_content
                if about_information.index_flags & AboutInfo.PROPERTY:
                    course_info[
                        about_information.property_name] = section_content

        # Broad exception handler to protect around and report problems with indexing
        try:
            searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info])
        except:  # pylint: disable=bare-except
            log.exception(
                "Course discovery indexing error encountered, course discovery index may be out of date %s",
                course_id,
            )
            raise

        log.debug("Successfully added %s course to the course discovery index",
                  course_id)
Esempio n. 35
0
    def index(cls,
              modulestore,
              structure_key,
              triggered_at=None,
              reindex_age=REINDEX_AGE):
        """
        Process course for indexing

        Arguments:
        modulestore - modulestore object to use for operations

        structure_key (CourseKey|LibraryKey) - course or library identifier

        triggered_at (datetime) - provides time at which indexing was triggered;
            useful for index updates - only things changed recently from that date
            (within REINDEX_AGE above ^^) will have their index updated, others skip
            updating their index but are still walked through in order to identify
            which items may need to be removed from the index
            If None, then a full reindex takes place

        Returns:
        Number of items that have been added to the index
        """
        error_list = []
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        structure_key = cls.normalize_structure_key(structure_key)
        location_info = cls._get_location_info(structure_key)

        # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index`
        indexed_count = {"count": 0}

        # indexed_items is a list of all the items that we wish to remain in the
        # index, whether or not we are planning to actually update their index.
        # This is used in order to build a query to remove those items not in this
        # list - those are ready to be destroyed
        indexed_items = set()

        # items_index is a list of all the items index dictionaries.
        # it is used to collect all indexes and index them using bulk API,
        # instead of per item index API call.
        items_index = []

        def get_item_location(item):
            """
            Gets the version agnostic item location
            """
            return item.location.version_agnostic().replace(branch=None)

        def prepare_item_index(item, skip_index=False, groups_usage_info=None):
            """
            Add this item to the items_index and indexed_items list

            Arguments:
            item - item to add to index, its children will be processed recursively

            skip_index - simply walk the children in the tree, the content change is
                older than the REINDEX_AGE window and would have been already indexed.
                This should really only be passed from the recursive child calls when
                this method has determined that it is safe to do so

            Returns:
            item_content_groups - content groups assigned to indexed item
            """
            is_indexable = hasattr(item, "index_dictionary")
            item_index_dictionary = item.index_dictionary(
            ) if is_indexable else None
            # if it's not indexable and it does not have children, then ignore
            if not item_index_dictionary and not item.has_children:
                return

            item_content_groups = None

            if item.category == "split_test":
                split_partition = item.get_selected_partition()
                for split_test_child in item.get_children():
                    if split_partition:
                        for group in split_partition.groups:
                            group_id = unicode(group.id)
                            child_location = item.group_id_to_child.get(
                                group_id, None)
                            if child_location == split_test_child.location:
                                groups_usage_info.update({
                                    unicode(get_item_location(split_test_child)):
                                    [group_id],
                                })
                                for component in split_test_child.get_children(
                                ):
                                    groups_usage_info.update({
                                        unicode(get_item_location(component)):
                                        [group_id]
                                    })

            if groups_usage_info:
                item_location = get_item_location(item)
                item_content_groups = groups_usage_info.get(
                    unicode(item_location), None)

            item_id = unicode(cls._id_modifier(item.scope_ids.usage_id))
            indexed_items.add(item_id)
            if item.has_children:
                # determine if it's okay to skip adding the children herein based upon how recently any may have changed
                skip_child_index = skip_index or \
                    (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age)
                children_groups_usage = []
                for child_item in item.get_children():
                    if modulestore.has_published_version(child_item):
                        children_groups_usage.append(
                            prepare_item_index(
                                child_item,
                                skip_index=skip_child_index,
                                groups_usage_info=groups_usage_info))
                if None in children_groups_usage:
                    item_content_groups = None

            if skip_index or not item_index_dictionary:
                return

            item_index = {}
            # if it has something to add to the index, then add it
            try:
                item_index.update(location_info)
                item_index.update(item_index_dictionary)
                item_index['id'] = item_id
                if item.start:
                    item_index['start_date'] = item.start
                item_index[
                    'content_groups'] = item_content_groups if item_content_groups else None
                item_index.update(cls.supplemental_fields(item))
                items_index.append(item_index)
                indexed_count["count"] += 1
                return item_content_groups
            except Exception as err:  # pylint: disable=broad-except
                # broad exception so that index operation does not fail on one item of many
                log.warning('Could not index item: %s - %r', item.location,
                            err)
                error_list.append(
                    _('Could not index item: {}').format(item.location))

        try:
            with modulestore.branch_setting(
                    ModuleStoreEnum.RevisionOption.published_only):
                structure = cls._fetch_top_level(modulestore, structure_key)
                groups_usage_info = cls.fetch_group_usage(
                    modulestore, structure)

                # First perform any additional indexing from the structure object
                cls.supplemental_index_information(modulestore, structure)

                # Now index the content
                for item in structure.get_children():
                    prepare_item_index(item,
                                       groups_usage_info=groups_usage_info)
                searcher.index(cls.DOCUMENT_TYPE, items_index)
                cls.remove_deleted_items(searcher, structure_key,
                                         indexed_items)
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %r",
                structure_key, err)
            error_list.append(_('General indexing error occurred'))

        if error_list:
            raise SearchIndexingError('Error(s) present during indexing',
                                      error_list)

        return indexed_count["count"]
Esempio n. 36
0
 def searcher(self):
     """ cached instance of search engine """
     if self._searcher is None:
         self._searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME)
     return self._searcher
Esempio n. 37
0
 def mock_perform(cls, filter_terms, text_search):
     # pylint: disable=no-member
     return SearchEngine.get_search_engine(cls.INDEX_NAME).search(
         field_dictionary=filter_terms,
         query_string=text_search,
         size=MAX_SIZE)
Esempio n. 38
0
 def searcher(self):
     """ Centralized call to getting the search engine for the test """
     return SearchEngine.get_search_engine(self.INDEX_NAME)
Esempio n. 39
0
 def setUp(self):
     super().setUp()
     ContentLibraryIndexer.remove_all_libraries()
     self.searcher = SearchEngine.get_search_engine(ContentLibraryIndexer.INDEX_NAME)
Esempio n. 40
0
 def setUp(self):
     super().setUp()
     ContentLibraryIndexer.remove_all_items()
     LibraryBlockIndexer.remove_all_items()
     self.searcher = SearchEngine.get_search_engine(
         LibraryBlockIndexer.INDEX_NAME)
Esempio n. 41
0
    def add_to_search_index(modulestore,
                            location,
                            delete=False,
                            raise_on_error=False):
        """
        Add to courseware search index from given location and its children
        """
        error_list = []
        # TODO - inline for now, need to move this out to a celery task
        searcher = SearchEngine.get_search_engine(INDEX_NAME)
        if not searcher:
            return

        if isinstance(location, CourseLocator):
            course_key = location
        else:
            course_key = location.course_key

        location_info = {
            "course": unicode(course_key),
        }

        def _fetch_item(item_location):
            """ Fetch the item from the modulestore location, log if not found, but continue """
            try:
                if isinstance(item_location, CourseLocator):
                    item = modulestore.get_course(item_location)
                else:
                    item = modulestore.get_item(
                        item_location,
                        revision=ModuleStoreEnum.RevisionOption.published_only)
            except ItemNotFoundError:
                log.warning('Cannot find: %s', item_location)
                return None

            return item

        def index_item_location(item_location, current_start_date):
            """ add this item to the search index """
            item = _fetch_item(item_location)
            if not item:
                return

            is_indexable = hasattr(item, "index_dictionary")
            # if it's not indexable and it does not have children, then ignore
            if not is_indexable and not item.has_children:
                return

            # if it has a defined start, then apply it and to it's children
            if item.start and (not current_start_date
                               or item.start > current_start_date):
                current_start_date = item.start

            if item.has_children:
                for child_loc in item.children:
                    index_item_location(child_loc, current_start_date)

            item_index = {}
            item_index_dictionary = item.index_dictionary(
            ) if is_indexable else None

            # if it has something to add to the index, then add it
            if item_index_dictionary:
                try:
                    item_index.update(location_info)
                    item_index.update(item_index_dictionary)
                    item_index['id'] = unicode(item.scope_ids.usage_id)
                    if current_start_date:
                        item_index['start_date'] = current_start_date

                    searcher.index(DOCUMENT_TYPE, item_index)
                except Exception as err:  # pylint: disable=broad-except
                    # broad exception so that index operation does not fail on one item of many
                    log.warning('Could not index item: %s - %s', item_location,
                                unicode(err))
                    error_list.append(
                        _('Could not index item: {}').format(item_location))

        def remove_index_item_location(item_location):
            """ remove this item from the search index """
            item = _fetch_item(item_location)
            if item:
                if item.has_children:
                    for child_loc in item.children:
                        remove_index_item_location(child_loc)

                searcher.remove(DOCUMENT_TYPE,
                                unicode(item.scope_ids.usage_id))

        try:
            if delete:
                remove_index_item_location(location)
            else:
                index_item_location(location, None)
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %s",
                course_key, unicode(err))
            error_list.append(_('General indexing error occurred'))

        if raise_on_error and error_list:
            raise SearchIndexingError(_('Error(s) present during indexing'),
                                      error_list)
Esempio n. 42
0
 def engine(cls):
     """
     Return course team search engine (if feature is enabled).
     """
     if cls.search_is_enabled():
         return SearchEngine.get_search_engine(index=cls.INDEX_NAME)
Esempio n. 43
0
    def index(cls,
              modulestore,
              structure_key,
              triggered_at=None,
              reindex_age=REINDEX_AGE):
        """
        Process course for indexing

        Arguments:
        structure_key (CourseKey|LibraryKey) - course or library identifier

        triggered_at (datetime) - provides time at which indexing was triggered;
            useful for index updates - only things changed recently from that date
            (within REINDEX_AGE above ^^) will have their index updated, others skip
            updating their index but are still walked through in order to identify
            which items may need to be removed from the index
            If None, then a full reindex takes place

        Returns:
        Number of items that have been added to the index
        """
        error_list = []
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        structure_key = cls.normalize_structure_key(structure_key)
        location_info = cls._get_location_info(structure_key)

        # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item`
        indexed_count = {"count": 0}

        # indexed_items is a list of all the items that we wish to remain in the
        # index, whether or not we are planning to actually update their index.
        # This is used in order to build a query to remove those items not in this
        # list - those are ready to be destroyed
        indexed_items = set()

        def index_item(item, skip_index=False):
            """
            Add this item to the search index and indexed_items list

            Arguments:
            item - item to add to index, its children will be processed recursively

            skip_index - simply walk the children in the tree, the content change is
                older than the REINDEX_AGE window and would have been already indexed.
                This should really only be passed from the recursive child calls when
                this method has determined that it is safe to do so
            """
            is_indexable = hasattr(item, "index_dictionary")
            item_index_dictionary = item.index_dictionary(
            ) if is_indexable else None
            # if it's not indexable and it does not have children, then ignore
            if not item_index_dictionary and not item.has_children:
                return

            item_id = unicode(cls._id_modifier(item.scope_ids.usage_id))
            indexed_items.add(item_id)
            if item.has_children:
                # determine if it's okay to skip adding the children herein based upon how recently any may have changed
                skip_child_index = skip_index or \
                    (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age)
                for child_item in item.get_children():
                    index_item(child_item, skip_index=skip_child_index)

            if skip_index or not item_index_dictionary:
                return

            item_index = {}
            # if it has something to add to the index, then add it
            try:
                item_index.update(location_info)
                item_index.update(item_index_dictionary)
                item_index['id'] = item_id
                if item.start:
                    item_index['start_date'] = item.start

                searcher.index(cls.DOCUMENT_TYPE, item_index)
                indexed_count["count"] += 1
            except Exception as err:  # pylint: disable=broad-except
                # broad exception so that index operation does not fail on one item of many
                log.warning('Could not index item: %s - %r', item.location,
                            err)
                error_list.append(
                    _('Could not index item: {}').format(item.location))

        try:
            with modulestore.branch_setting(
                    ModuleStoreEnum.RevisionOption.published_only):
                structure = cls._fetch_top_level(modulestore, structure_key)
                for item in structure.get_children():
                    index_item(item)
                cls.remove_deleted_items(searcher, structure_key,
                                         indexed_items)
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %r",
                structure_key, err)
            error_list.append(_('General indexing error occurred'))

        if error_list:
            raise SearchIndexingError('Error(s) present during indexing',
                                      error_list)

        return indexed_count["count"]
Esempio n. 44
0
 def test_search_from_url(self):
     """ ensure that we get the error back when the backend fails """
     searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME)
     with self.assertRaises(StandardError):
         searcher.index("courseware_content", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}])
 def searcher(self):
     """ Centralized call to getting the search engine for the test """
     return SearchEngine.get_search_engine(self.INDEX_NAME)
Esempio n. 46
0
    def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE):
        """
        Process course for indexing

        Arguments:
        modulestore - modulestore object to use for operations

        structure_key (CourseKey|LibraryKey) - course or library identifier

        triggered_at (datetime) - provides time at which indexing was triggered;
            useful for index updates - only things changed recently from that date
            (within REINDEX_AGE above ^^) will have their index updated, others skip
            updating their index but are still walked through in order to identify
            which items may need to be removed from the index
            If None, then a full reindex takes place

        Returns:
        Number of items that have been added to the index
        """
        error_list = []
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        structure_key = cls.normalize_structure_key(structure_key)
        location_info = cls._get_location_info(structure_key)

        # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index`
        indexed_count = {
            "count": 0
        }

        # indexed_items is a list of all the items that we wish to remain in the
        # index, whether or not we are planning to actually update their index.
        # This is used in order to build a query to remove those items not in this
        # list - those are ready to be destroyed
        indexed_items = set()

        # items_index is a list of all the items index dictionaries.
        # it is used to collect all indexes and index them using bulk API,
        # instead of per item index API call.
        items_index = []

        def get_item_location(item):
            """
            Gets the version agnostic item location
            """
            return item.location.version_agnostic().replace(branch=None)

        def prepare_item_index(item, skip_index=False, groups_usage_info=None):
            """
            Add this item to the items_index and indexed_items list

            Arguments:
            item - item to add to index, its children will be processed recursively

            skip_index - simply walk the children in the tree, the content change is
                older than the REINDEX_AGE window and would have been already indexed.
                This should really only be passed from the recursive child calls when
                this method has determined that it is safe to do so

            Returns:
            item_content_groups - content groups assigned to indexed item
            """
            is_indexable = hasattr(item, "index_dictionary")
            item_index_dictionary = item.index_dictionary() if is_indexable else None
            # if it's not indexable and it does not have children, then ignore
            if not item_index_dictionary and not item.has_children:
                return

            item_content_groups = None

            if item.category == "split_test":
                split_partition = item.get_selected_partition()
                for split_test_child in item.get_children():
                    if split_partition:
                        for group in split_partition.groups:
                            group_id = unicode(group.id)
                            child_location = item.group_id_to_child.get(group_id, None)
                            if child_location == split_test_child.location:
                                groups_usage_info.update({
                                    unicode(get_item_location(split_test_child)): [group_id],
                                })
                                for component in split_test_child.get_children():
                                    groups_usage_info.update({
                                        unicode(get_item_location(component)): [group_id]
                                    })

            if groups_usage_info:
                item_location = get_item_location(item)
                item_content_groups = groups_usage_info.get(unicode(item_location), None)

            item_id = unicode(cls._id_modifier(item.scope_ids.usage_id))
            indexed_items.add(item_id)
            if item.has_children:
                # determine if it's okay to skip adding the children herein based upon how recently any may have changed
                skip_child_index = skip_index or \
                    (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age)
                children_groups_usage = []
                for child_item in item.get_children():
                    if modulestore.has_published_version(child_item):
                        children_groups_usage.append(
                            prepare_item_index(
                                child_item,
                                skip_index=skip_child_index,
                                groups_usage_info=groups_usage_info
                            )
                        )
                if None in children_groups_usage:
                    item_content_groups = None

            if skip_index or not item_index_dictionary:
                return

            item_index = {}
            # if it has something to add to the index, then add it
            try:
                item_index.update(location_info)
                item_index.update(item_index_dictionary)
                item_index['id'] = item_id
                if item.start:
                    item_index['start_date'] = item.start
                item_index['content_groups'] = item_content_groups if item_content_groups else None
                item_index.update(cls.supplemental_fields(item))
                items_index.append(item_index)
                indexed_count["count"] += 1
                return item_content_groups
            except Exception as err:  # pylint: disable=broad-except
                # broad exception so that index operation does not fail on one item of many
                log.warning('Could not index item: %s - %r', item.location, err)
                error_list.append(_('Could not index item: {}').format(item.location))

        try:
            with modulestore.branch_setting(ModuleStoreEnum.RevisionOption.published_only):
                structure = cls._fetch_top_level(modulestore, structure_key)
                groups_usage_info = cls.fetch_group_usage(modulestore, structure)

                # First perform any additional indexing from the structure object
                cls.supplemental_index_information(modulestore, structure)

                # Now index the content
                for item in structure.get_children():
                    prepare_item_index(item, groups_usage_info=groups_usage_info)
                searcher.index(cls.DOCUMENT_TYPE, items_index)
                cls.remove_deleted_items(searcher, structure_key, indexed_items)
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %r",
                structure_key,
                err
            )
            error_list.append(_('General indexing error occurred'))

        if error_list:
            raise SearchIndexingError('Error(s) present during indexing', error_list)

        return indexed_count["count"]
Esempio n. 47
0
    def index_course(cls, modulestore, course_key, triggered_at=None, reindex_age=REINDEX_AGE):
        """
        Process course for indexing

        Arguments:
        course_key (CourseKey) - course identifier

        triggered_at (datetime) - provides time at which indexing was triggered;
            useful for index updates - only things changed recently from that date
            (within REINDEX_AGE above ^^) will have their index updated, others skip
            updating their index but are still walked through in order to identify
            which items may need to be removed from the index
            If None, then a full reindex takes place

        Returns:
        Number of items that have been added to the index
        """
        error_list = []
        searcher = SearchEngine.get_search_engine(INDEX_NAME)
        if not searcher:
            return

        location_info = {
            "course": unicode(course_key),
        }

        # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item`
        indexed_count = {
            "count": 0
        }

        # indexed_items is a list of all the items that we wish to remain in the
        # index, whether or not we are planning to actually update their index.
        # This is used in order to build a query to remove those items not in this
        # list - those are ready to be destroyed
        indexed_items = set()

        def index_item(item, skip_index=False):
            """
            Add this item to the search index and indexed_items list

            Arguments:
            item - item to add to index, its children will be processed recursively

            skip_index - simply walk the children in the tree, the content change is
                older than the REINDEX_AGE window and would have been already indexed.
                This should really only be passed from the recursive child calls when
                this method has determined that it is safe to do so
            """
            is_indexable = hasattr(item, "index_dictionary")
            item_index_dictionary = item.index_dictionary() if is_indexable else None
            # if it's not indexable and it does not have children, then ignore
            if not item_index_dictionary and not item.has_children:
                return

            item_id = unicode(item.scope_ids.usage_id)
            indexed_items.add(item_id)
            if item.has_children:
                # determine if it's okay to skip adding the children herein based upon how recently any may have changed
                skip_child_index = skip_index or \
                    (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age)
                for child_item in item.get_children():
                    index_item(child_item, skip_index=skip_child_index)

            if skip_index or not item_index_dictionary:
                return

            item_index = {}
            # if it has something to add to the index, then add it
            try:
                item_index.update(location_info)
                item_index.update(item_index_dictionary)
                item_index['id'] = item_id
                if item.start:
                    item_index['start_date'] = item.start

                searcher.index(DOCUMENT_TYPE, item_index)
                indexed_count["count"] += 1
            except Exception as err:  # pylint: disable=broad-except
                # broad exception so that index operation does not fail on one item of many
                log.warning('Could not index item: %s - %r', item.location, err)
                error_list.append(_('Could not index item: {}').format(item.location))

        def remove_deleted_items():
            """
            remove any item that is present in the search index that is not present in updated list of indexed items
            as we find items we can shorten the set of items to keep
            """
            response = searcher.search(
                doc_type=DOCUMENT_TYPE,
                field_dictionary={"course": unicode(course_key)},
                exclude_ids=indexed_items
            )
            result_ids = [result["data"]["id"] for result in response["results"]]
            for result_id in result_ids:
                searcher.remove(DOCUMENT_TYPE, result_id)

        try:
            with modulestore.branch_setting(ModuleStoreEnum.RevisionOption.published_only):
                course = modulestore.get_course(course_key, depth=None)
                for item in course.get_children():
                    index_item(item)
                remove_deleted_items()
        except Exception as err:  # pylint: disable=broad-except
            # broad exception so that index operation does not prevent the rest of the application from working
            log.exception(
                "Indexing error encountered, courseware index may be out of date %s - %r",
                course_key,
                err
            )
            error_list.append(_('General indexing error occurred'))

        if error_list:
            raise SearchIndexingError('Error(s) present during indexing', error_list)

        return indexed_count["count"]
Esempio n. 48
0
 def engine(cls):
     """
     Return course team search engine (if feature is enabled).
     """
     if cls.search_is_enabled():
         return SearchEngine.get_search_engine(index=cls.INDEX_NAME)
Esempio n. 49
0
    def index_about_information(cls, modulestore, course):
        """
        Add the given course to the course discovery index

        Arguments:
        modulestore - modulestore object to use for operations

        course - course object from which to take properties, locate about information
        """
        searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
        if not searcher:
            return

        course_id = unicode(course.id)
        course_info = {
            'id': course_id,
            'course': course_id,
            'content': {},
            'image_url': course_image_url(course),
        }

        # load data for all of the 'about' modules for this course into a dictionary
        about_dictionary = {
            item.location.name: item.data
            for item in modulestore.get_items(course.id, qualifiers={"category": "about"})
        }

        about_context = {
            "course": course,
            "about_dictionary": about_dictionary,
        }

        for about_information in cls.ABOUT_INFORMATION_TO_INCLUDE:
            # Broad exception handler so that a single bad property does not scupper the collection of others
            try:
                section_content = about_information.get_value(**about_context)
            except:  # pylint: disable=bare-except
                section_content = None
                log.warning(
                    "Course discovery could not collect property %s for course %s",
                    about_information.property_name,
                    course_id,
                    exc_info=True,
                )

            if section_content:
                if about_information.index_flags & AboutInfo.ANALYSE:
                    analyse_content = section_content
                    if isinstance(section_content, basestring):
                        analyse_content = strip_html_content_to_text(section_content)
                    course_info['content'][about_information.property_name] = analyse_content
                    if about_information.property_name == "more_info":
                        course_info[about_information.property_name] = analyse_content
                if about_information.index_flags & AboutInfo.PROPERTY:
                    course_info[about_information.property_name] = section_content

        # Broad exception handler to protect around and report problems with indexing
        try:
            searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info])
        except:  # pylint: disable=bare-except
            log.exception(
                "Course discovery indexing error encountered, course discovery index may be out of date %s",
                course_id,
            )
            raise

        log.debug(
            "Successfully added %s course to the course discovery index",
            course_id
        )