Beispiel #1
0
    def delete_by_query(self, doc_type='', query=MatchAll()):
        """Perform a deletion trough the ``_delete_by_query`` API.

        See https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html

        .. attention ::
            Call to this function must be done with great care!

        :param doc_type: the document type
        :type doc_type: str
        :param query: the query to match all document to be deleted
        :type query: elasticsearch_dsl.query.Query
        """

        if not self.connected_to_es:
            return

        if not self.index_exists:
            raise NeedIndex()

        response = self.es.delete_by_query(index=self.index,
                                           doc_type=doc_type,
                                           body={'query': query})

        self.logger.info('delete_by_query {}s ({})'.format(
            doc_type, response['deleted']))
Beispiel #2
0
    def qc(self, es, index):
        self.logger.info("Starting QC")
        #number of EFO terms
        efo_term_count = 0

        #top level terms (i.e. categories)
        efo_top_levels = []

        #terms without a description
        efo_missing_description_count = 0

        #loop over all efo terms and calculate the metrics
        #Note: try to avoid doing this more than once!
        for efo_term in Search().using(es).index(index).query(MatchAll()).scan():
            efo_term_count += 1

            #path_labels is a list of lists of all paths to the root
            #top level terms will be those with one list of one item that is itself
            if len(efo_term["path_labels"]) == 1:
                if len(efo_term["path_labels"][0]) == 1:
                    efo_top_levels.append(efo_term["label"])

            if efo_term["definition"] == None or len(efo_term["definition"].strip()) == 0:
                efo_missing_description_count += 1

        #put the metrics into a single dict
        metrics = dict()
        metrics["efo.count"] = efo_term_count
        metrics["efo.top"] = sorted(efo_top_levels)
        metrics["efo.top.count"] = len(efo_top_levels)
        metrics["efo.missing_description.count"] = efo_missing_description_count

        #return the metrics to the caller so they can write to file or further compare
        self.logger.info("Finished QC")
        return metrics
def make_query(query, filters, page, sort_by):
    try:
        client = Elasticsearch()
        s = Search(client, index=app.config['INDEX'])

        if query:
            s = s.query(QueryString(query=escape_query(query)))
            if not sort_by:
                sort_by = "relevance"
        else:
            s = s.query(MatchAll())
            if not sort_by:
                sort_by = DEFAULT_SORT_BY

        s = s.sort(SORT_BY.get(sort_by, DEFAULT_SORT_BY)['value'])

        start = (page - 1) * 20
        end = start + 20
        s = s[start:end]

        if filters:
            s = s.filter('bool', must=filters)

        result = s.execute()
        return result
    except ConnectionError, ex:
        return None
Beispiel #4
0
    def test_hidden_forums_give_no_results_if_user_not_allowed(self):
        """Long name, isn't ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden staff group.
        text = "test"

        group = Group.objects.create(name="Les illuminatis anonymes de ZdS")
        _, hidden_forum = create_category_and_forum(group)

        self.staff.groups.add(group)
        self.staff.save()

        topic_1 = TopicFactory(forum=hidden_forum,
                               author=self.staff,
                               title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        # 2. search without connection and get not result
        result = self.client.get(reverse("search:query") + "?q=" + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 0)

        # 3. Connect with user (not a member of the group), search, and get no result
        self.client.force_login(self.user)

        result = self.client.get(reverse("search:query") + "?q=" + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 0)

        # 4. Connect with staff, search, and get the topic and the post
        self.client.logout()
        self.client.force_login(self.staff)

        result = self.client.get(reverse("search:query") + "?q=" + text,
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context["object_list"].execute()
        self.assertEqual(response.hits.total, 2)  # ok !
Beispiel #5
0
 def scan(cls):
     """
     Return a list of all documents within this index.
     :return: Generator that will yield every document (as a list... kinda)
     """
     scan_request = cls._index.search().query(MatchAll())
     response = scan_request.scan()
     return response
Beispiel #6
0
async def test_delete(index_name):
    assert await Search(index=index_name).count() == 3

    await Search(index=index_name).query('term', value=1).params(refresh=True).delete()
    assert await Search(index=index_name).count() == 2

    await Search(index=index_name).query(MatchAll()).params(refresh=True).delete()
    assert await Search(index=index_name).count() == 0
Beispiel #7
0
    def qc(self, es, index):

        #number of gene entries
        gene_count = 0
        #Note: try to avoid doing this more than once!
        for gene_entry in Search().using(es).index(index).query(MatchAll()).scan():
            gene_count += 1

        #put the metrics into a single dict
        metrics = dict()
        metrics["gene.count"] = gene_count

        return metrics
    def qc(self, es, index):
        self.logger.info("Starting QC")

        #number of reactions
        reaction_count = 0
        #Note: try to avoid doing this more than once!
        for _ in Search().using(es).index(index).query(MatchAll()).scan():
            reaction_count += 1

        #put the metrics into a single dict
        metrics = dict()
        metrics["reactome.count"] = reaction_count

        self.logger.info("Finished QC")
        return metrics
Beispiel #9
0
    def qc(self, es, index):

        #number of eco entries
        association_count = 0
        #Note: try to avoid doing this more than once!
        for association in Search().using(es).index(index).query(MatchAll()).scan():
            association_count += 1
            if association_count % 1000 == 0:
                self.logger.debug("checking %d", association_count)

        #put the metrics into a single dict
        metrics = dict()
        metrics["association.count"] = association_count

        return metrics
Beispiel #10
0
    def test_hidden_post_are_not_result(self):
        """Hidden posts should not show up in the search results"""

        if not self.manager.connected_to_es:
            return

        # 1. Index and test search:
        text = 'test'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        post_1 = Post.objects.get(pk=post_1.pk)

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 1)
        self.assertEqual(response[0].meta.id, post_1.es_id)

        # 2. Hide, reindex and search again:
        post_1.hide_comment_by_user(self.staff,
                                    'Un abus de pouvoir comme un autre ;)')
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)  # nothing in the results
Beispiel #11
0
    def qc(self, es, index):
        self.logger.info("Starting QC")

        #number of hpa entries
        hpa_count = 0
        #Note: try to avoid doing this more than once!
        for hpa_entry in Search().using(es).index(index).query(
                MatchAll()).scan():
            hpa_count += 1

        #put the metrics into a single dict
        metrics = dict()
        metrics["hpa.count"] = hpa_count

        self.logger.info("Finished QC")
        return metrics
Beispiel #12
0
    def merge_data(self, genes, es, r_server, data_config, es_config):

        index = es_config.ens.name

        for row in Search().using(es).index(index).query(MatchAll()).scan():
            gene = None
            if row['id'] in genes:
                gene = genes.get_gene(row['id'])
            else:
                gene = Gene()
            self.load_ensembl_data(gene, row)
            genes.add_gene(gene)

        self._clean_non_reference_genes(genes)

        self._logger.info("STATS AFTER ENSEMBL PARSING:\n" + genes.get_stats())
Beispiel #13
0
    def qc(self, es, index):
        """
        Run a series of QC tests on the Ensembl Elasticsearch index. Returns a dictionary
        of string test names and result objects
        """
        self.logger.info("Starting QC")
        # number of genes
        ensembl_count = 0
        # Note: try to avoid doing this more than once!
        for e in Search().using(es).index(index).query(MatchAll()).scan():
            ensembl_count += 1

        # put the metrics into a single dict
        metrics = dict()
        metrics["ensembl.count"] = ensembl_count

        self.logger.info("Finished QC")
        return metrics
Beispiel #14
0
    def qc(self, es, index):
        """Run a series of QC tests on EFO elasticsearch index. Returns a dictionary
        of string test names and result objects
        """
        self.logger.info("Starting QC")
        #number of uniprot entries
        uniprot_count = 0
        #Note: try to avoid doing this more than once!
        for unprot_entry in Search().using(es).index(index).query(MatchAll()).scan():
            uniprot_count += 1

            if uniprot_count % 1000 == 0:
                self.logger.debug("QC of %d uniprot entries", uniprot_count)

        #put the metrics into a single dict
        metrics = dict()
        metrics["uniprot.count"] = uniprot_count

        self.logger.info("Finished QC")
        return metrics
Beispiel #15
0
    def merge_data(self, genes, es, r_server, data_config, es_config):

        index = es_config.uni.name
        reactome_retriever = ReactomeRetriever(es, es_config.rea.name)

        c = 0
        for seqrec in Search().using(es).index(index).query(MatchAll()).scan():
            #these are base 64 encoded json - need to decode
            #TODO access the source directly
            seqrec = jsonpickle.decode(base64.b64decode(seqrec['entry']))
            c += 1
            if c % 1000 == 0:
                self._logger.info("%i entries retrieved for uniprot" % c)
            if 'Ensembl' in seqrec.annotations['dbxref_extended']:
                ensembl_data = seqrec.annotations['dbxref_extended']['Ensembl']
                ensembl_genes_id = []
                for ens_data_point in ensembl_data:
                    ensembl_genes_id.append(ens_data_point['value']['gene ID'])
                ensembl_genes_id = list(set(ensembl_genes_id))
                success = False
                for ensembl_id in ensembl_genes_id:
                    if ensembl_id in genes:
                        gene = genes.get_gene(ensembl_id)
                        self.load_uniprot_entry(gene, seqrec,
                                                reactome_retriever)
                        genes.add_gene(gene)
                        success = True
                        break
                if not success:
                    self._logger.debug(
                        'Cannot find ensembl id(s) %s coming from uniprot entry %s in available geneset'
                        % (ensembl_genes_id, seqrec.id))
            else:
                self.missing_ensembl.add(seqrec.id)

        for reactome_id in sorted(self.missing_reactome):
            self._logger.warning("Unable to find reactome for %s", reactome_id)
        for uniprot_id in sorted(self.missing_ensembl):
            self._logger.warning("Unable to find ensemble for %s", uniprot_id)

        self._logger.info("%i entries retrieved for uniprot" % c)
Beispiel #16
0
def _build_term_query(term, fields=None):
    """Builds a term query."""
    if term == '':
        return MatchAll()

    should_query = [
        # Promote exact name match
        Match(**{'name.keyword': {
            'query': term,
            'boost': 2
        }}),
        # Cross match fields
        MultiMatch(
            query=term,
            fields=fields,
            type='cross_fields',
            operator='and',
        ),
    ]

    return Bool(should=should_query)
Beispiel #17
0
    def test_boosts(self):
        """Check if boosts are doing their job"""

        if not self.manager.connected_to_es:
            return

        # 1. Create topics (with identical titles), posts (with identical texts), an article and a tuto
        text = 'test'

        topic_1_solved_sticky = TopicFactory(forum=self.forum,
                                             author=self.user)
        topic_1_solved_sticky.title = text
        topic_1_solved_sticky.subtitle = ''
        topic_1_solved_sticky.is_solved = True
        topic_1_solved_sticky.is_sticky = True
        topic_1_solved_sticky.save()

        post_1 = PostFactory(topic=topic_1_solved_sticky,
                             author=self.user,
                             position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        post_2_useful = PostFactory(topic=topic_1_solved_sticky,
                                    author=self.user,
                                    position=2)
        post_2_useful.text = post_2_useful.text_html = text
        post_2_useful.is_useful = True
        post_2_useful.like = 5
        post_2_useful.dislike = 2  # l/d ratio above 1
        post_2_useful.save()

        topic_2_locked = TopicFactory(forum=self.forum,
                                      author=self.user,
                                      title=text)
        topic_2_locked.title = text
        topic_2_locked.subtitle = ''
        topic_2_locked.is_locked = True
        topic_2_locked.save()

        post_3_ld_below_1 = PostFactory(topic=topic_2_locked,
                                        author=self.user,
                                        position=1)
        post_3_ld_below_1.text = post_3_ld_below_1.text_html = text
        post_3_ld_below_1.like = 2
        post_3_ld_below_1.dislike = 5  # l/d ratio below 1
        post_3_ld_below_1.save()

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(text, tuto.slug, text, text)

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, 'Who cares ?', 'Same here')
        ExtractFactory(container=chapter1, db_object=tuto)

        published_tuto = publish_content(tuto,
                                         tuto_draft,
                                         is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published_tuto
        tuto.save()

        article = PublishedContentFactory(type='ARTICLE', title=text)
        published_article = PublishedContent.objects.get(content_pk=article.pk)

        opinion_not_picked = PublishedContentFactory(type='OPINION',
                                                     title=text)
        published_opinion_not_picked = PublishedContent.objects.get(
            content_pk=opinion_not_picked.pk)

        opinion_picked = PublishedContentFactory(type='OPINION', title=text)
        opinion_picked.sha_picked = opinion_picked.sha_draft
        opinion_picked.date_picked = datetime.datetime.now()
        opinion_picked.save()

        published_opinion_picked = PublishedContent.objects.get(
            content_pk=opinion_picked.pk)

        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 10)

        # 2. Reset all boosts to 1
        for doc_type in settings.ZDS_APP['search']['boosts']:
            for key in settings.ZDS_APP['search']['boosts'][doc_type]:
                settings.ZDS_APP['search']['boosts'][doc_type][key] = 1.0

        # 3. Test posts
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score ==
                        response[2].meta.score)

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score == response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(response[2].meta.id, str(
            post_2_useful.pk))  # post 2 is the only one not first

        settings.ZDS_APP['search']['boosts']['post']['if_first'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(post_2_useful.pk))  # post 2 is useful

        settings.ZDS_APP['search']['boosts']['post']['if_useful'] = 1.0
        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_2_useful.pk))  # post 2 have a l/d ratio of 5/2

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] = 1.0
        settings.ZDS_APP['search']['boosts']['post'][
            'ld_ratio_below_1'] = 2.0  # no one would do that in real life

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 3)

        self.assertTrue(response[0].meta.score > response[1].meta.score ==
                        response[2].meta.score)
        self.assertEqual(response[0].meta.id, str(
            post_3_ld_below_1.pk))  # post 3 have a l/d ratio of 2/5

        settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] = 1.0

        # 4. Test topics
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        # score are equals without boost:
        self.assertTrue(response[0].meta.score == response[1].meta.score)

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is sticky

        settings.ZDS_APP['search']['boosts']['topic']['if_sticky'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_1_solved_sticky.pk))  # topic 1 is solved

        settings.ZDS_APP['search']['boosts']['topic']['if_solved'] = 1.0
        settings.ZDS_APP['search']['boosts']['topic'][
            'if_locked'] = 2.0  # no one would do that in real life

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Topic.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(topic_2_locked.pk))  # topic 2 is locked

        settings.ZDS_APP['search']['boosts']['topic'][
            'if_locked'] = 1.0  # no one would do that in real life

        # 5. Test published contents
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        # score are equals without boost:
        self.assertTrue(
            response[0].meta.score == response[1].meta.score == response[2].
            meta.score == response[3].meta.score == response[4].meta.score)

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_article'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_article.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_article'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_tutorial'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_tutorial'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion'] = 2.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion_not_picked'] = 4.0
        # Note: in "real life", unpicked opinion would get a boost < 1.

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score >
                        response[2].meta.score)
        self.assertEqual(
            response[0].meta.id,
            str(published_opinion_not_picked.pk))  # unpicked opinion got first
        self.assertEqual(response[1].meta.id, str(published_opinion_picked.pk))

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_opinion_not_picked'] = 1.0
        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_medium_or_big_tutorial'] = 2.0

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 5)

        self.assertTrue(response[0].meta.score > response[1].meta.score)
        self.assertEqual(response[0].meta.id,
                         str(published_tuto.pk))  # obvious

        settings.ZDS_APP['search']['boosts']['publishedcontent'][
            'if_medium_or_big_tutorial'] = 1.0

        # 6. Test global boosts
        # NOTE: score are NOT the same for all documents, no matter how hard it tries to, small differences exists

        for model in self.indexable:

            # set a huge number to overcome the small differences:
            settings.ZDS_APP['search']['boosts'][
                model.get_es_document_type()]['global'] = 10.0

            result = self.client.get(reverse('search:query') + '?q=' + text,
                                     follow=False)

            self.assertEqual(result.status_code, 200)
            response = result.context['object_list'].execute()
            self.assertEqual(response.hits.total, 10)

            self.assertEqual(response[0].meta.doc_type,
                             model.get_es_document_type())  # obvious

            settings.ZDS_APP['search']['boosts'][
                model.get_es_document_type()]['global'] = 1.0
Beispiel #18
0
    def test_category_and_subcategory_impact_search(self):
        """If two contents do not belong to the same (sub)category"""

        if not self.manager.connected_to_es:
            return

        text = 'Did you ever hear the tragedy of Darth Plagueis The Wise?'

        # 1. Create two contents with different subcategories
        category_1 = 'category 1'
        subcategory_1 = SubCategoryFactory(title=category_1)
        category_2 = 'category 2'
        subcategory_2 = SubCategoryFactory(title=category_2)

        tuto_1 = PublishableContentFactory(type='TUTORIAL')
        tuto_1_draft = tuto_1.load_version()

        tuto_1.title = text
        tuto_1.authors.add(self.user)
        tuto_1.subcategory.add(subcategory_1)
        tuto_1.save()

        tuto_1_draft.description = text
        tuto_1_draft.repo_update_top_container(text, tuto_1.slug, text, text)

        chapter_1 = ContainerFactory(parent=tuto_1_draft, db_object=tuto_1)
        extract_1 = ExtractFactory(container=chapter_1, db_object=tuto_1)
        extract_1.repo_update(text, text)

        published_1 = publish_content(tuto_1,
                                      tuto_1_draft,
                                      is_major_update=True)

        tuto_1.sha_public = tuto_1_draft.current_version
        tuto_1.sha_draft = tuto_1_draft.current_version
        tuto_1.public_version = published_1
        tuto_1.save()

        tuto_2 = PublishableContentFactory(type='TUTORIAL')
        tuto_2_draft = tuto_2.load_version()

        tuto_2.title = text
        tuto_2.authors.add(self.user)
        tuto_2.subcategory.add(subcategory_2)
        tuto_2.save()

        tuto_2_draft.description = text
        tuto_2_draft.repo_update_top_container(text, tuto_2.slug, text, text)

        chapter_2 = ContainerFactory(parent=tuto_2_draft, db_object=tuto_2)
        extract_2 = ExtractFactory(container=chapter_2, db_object=tuto_2)
        extract_2.repo_update(text, text)

        published_2 = publish_content(tuto_2,
                                      tuto_2_draft,
                                      is_major_update=True)

        tuto_2.sha_public = tuto_2_draft.current_version
        tuto_2.sha_draft = tuto_2_draft.current_version
        tuto_2.public_version = published_2
        tuto_2.save()

        # 2. Index:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 4)  # Ok

        # 3. Test
        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&model=content&subcategory=' +
                                 subcategory_1.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == 'publishedcontent'
        ][0], published_1.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == 'chapter'
        ][0], tuto_1.slug + '__' + chapter_1.slug)

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&model=content&subcategory=' +
                                 subcategory_2.slug,
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 2)

        self.assertEqual([
            int(r.meta.id) for r in response
            if r.meta.doc_type == 'publishedcontent'
        ][0], published_2.pk)
        self.assertEqual([
            r.meta.id for r in response if r.meta.doc_type == 'chapter'
        ][0], tuto_2.slug + '__' + chapter_2.slug)
Beispiel #19
0
    def test_special_case_of_contents(self):
        """test that the old publishedcontent does not stay when a new one is created"""

        if not self.manager.connected_to_es:
            return

        # 1. Create a middle-tutorial, publish it, then index it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft = tuto.load_version()
        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        ExtractFactory(container=chapter1, db_object=tuto)
        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent, force_reindexing=True)  # index
        self.manager.refresh_index()

        first_publication = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(first_publication.es_already_indexed)
        self.assertFalse(first_publication.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 2)  # get 2 results, one for the content and one for the chapter

        self.assertEqual(PublishedContent.objects.count(), 1)

        # 2. Change thet title, which will trigger a change in the slug
        tuto = PublishableContent.objects.get(pk=tuto.pk)
        versioned = tuto.load_version(sha=tuto.sha_draft)

        tuto.title = 'un titre complètement différent!'
        tuto.save()

        versioned.repo_update_top_container(tuto.title, tuto.slug, 'osef', 'osef')
        second_publication = publish_content(tuto, versioned, True)

        tuto.sha_public = versioned.current_version
        tuto.sha_draft = versioned.current_version
        tuto.public_version = second_publication
        tuto.save()

        self.assertEqual(PublishedContent.objects.count(), 2)  # now there is two objects ...
        first_publication = PublishedContent.objects.get(pk=first_publication.pk)
        self.assertTrue(first_publication.must_redirect)  # .. including the first one, for redirection

        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 0)  # the old one is gone (and we need to reindex to get the new one)

        # 3. Check if indexation brings the new one, and not the old one
        self.manager.es_bulk_indexing_of_model(PublishedContent, force_reindexing=True)  # index
        self.manager.refresh_index()

        first_publication = PublishedContent.objects.get(pk=first_publication.pk)
        second_publication = PublishedContent.objects.get(pk=second_publication.pk)

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 2)  # Still 2, not 4 !

        found_old = False
        found_new = False

        for hit in results:
            if hit.meta.doc_type == PublishedContent.get_es_document_type():
                if hit.meta.id == first_publication.es_id:
                    found_old = True
                if hit.meta.id == second_publication.es_id:
                    found_new = True

        self.assertTrue(found_new)
        self.assertFalse(found_old)
Beispiel #20
0
    def test_indexation(self):
        """test the indexation and deletion of the different documents"""

        if not self.manager.connected_to_es:
            return

        # create a topic with a post
        topic = TopicFactory(forum=self.forum, author=self.user)
        post = PostFactory(topic=topic, author=self.user, position=1)

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        # create a middle-tutorial and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft = tuto.load_version()
        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        ExtractFactory(container=chapter1, db_object=tuto)
        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        # 1. index all
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model, force_reindexing=False)
            self.manager.refresh_index()

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get 4 results, one of each type

        must_contain = {'post': False, 'topic': False, 'publishedcontent': False, 'chapter': False}
        id_must_be = {
            'post': str(post.pk),
            'topic': str(topic.pk),
            'publishedcontent': str(published.pk),
            'chapter': tuto.slug + '__' + chapter1.slug
        }

        for hit in results:
            doc_type = hit.meta.doc_type
            must_contain[doc_type] = True
            self.assertEqual(hit.meta.id, id_must_be[doc_type])

        self.assertTrue(all(must_contain))

        # 2. Test what reindexation will do:
        new_topic = TopicFactory(forum=self.forum, author=self.user)
        new_post = PostFactory(topic=new_topic, author=self.user, position=1)

        pk_of_topics_to_reindex = []
        for item in Topic.get_es_indexable(force_reindexing=False):
            pk_of_topics_to_reindex.append(item.pk)

        pk_of_posts_to_reindex = []
        for item in Post.get_es_indexable(force_reindexing=False):
            pk_of_posts_to_reindex.append(item.pk)

        self.assertTrue(topic.pk not in pk_of_topics_to_reindex)
        self.assertTrue(new_topic.pk in pk_of_topics_to_reindex)
        self.assertTrue(post.pk not in pk_of_posts_to_reindex)
        self.assertTrue(new_post.pk in pk_of_posts_to_reindex)

        for model in self.indexable:  # ok, so let's index that
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model, force_reindexing=False)
        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 6)  # good!

        # 3. Test single deletion:
        new_post = Post.objects.get(pk=new_post.pk)

        self.manager.delete_document(new_post)
        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 5)  # one is missing

        for hit in results:
            self.assertTrue(hit.meta.doc_type != Post.get_es_document_type() or hit.meta.id != new_post.es_id)

        # 4. Test "delete_by_query_deletion":
        topic = Topic.objects.get(pk=topic.pk)
        new_topic = Topic.objects.get(pk=new_topic.pk)

        self.manager.delete_by_query(Topic.get_es_document_type(), MatchAll())  # the two topic are deleted
        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 3)

        for hit in results:
            self.assertTrue(hit.meta.doc_type != Topic.get_es_document_type() or hit.meta.id != new_topic.es_id)
            self.assertTrue(hit.meta.doc_type != Topic.get_es_document_type() or hit.meta.id != topic.es_id)

        # 5. Test that the deletion of an object also triggers its deletion in ES
        post = Post.objects.get(pk=post.pk)
        post.delete()
        self.manager.refresh_index()

        s = Search()
        s.query(MatchAll())
        results = self.manager.setup_search(s).execute()
        self.assertEqual(len(results), 2)

        for hit in results:
            self.assertTrue(hit.meta.doc_type != Post.get_es_document_type() or hit.meta.id != post.es_id)

        # 6. Test full desindexation:
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.clear_indexing_of_model(model)

        # note "topic" is gone since "post" is gone, due to relationships at the Django level
        new_topic = Topic.objects.get(pk=new_topic.pk)
        new_post = Post.objects.get(pk=new_post.pk)

        self.assertFalse(new_topic.es_already_indexed)
        self.assertTrue(new_topic.es_flagged)
        self.assertFalse(new_post.es_already_indexed)
        self.assertTrue(new_post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)
Beispiel #21
0
 def all(cls):
     p = cls.search()
     p.query = MatchAll()
     return p.execute()
 def get_diseases(self, es):
     for disease in Search().using(es).index(self.es_index_efo).extra(
             track_total_hits=True).query(MatchAll()).scan():
         yield disease.to_dict()
Beispiel #23
0
    def test_change_publishedcontents_impacts_chapter(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create middle-size content and index it
        text = 'test'

        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 2)

        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id,
                         published.content_public_slug + '__' + chapter1.slug)

        # 2. Change tuto: delete chapter and insert new one !
        tuto = PublishableContent.objects.get(pk=tuto.pk)
        tuto_draft = tuto.load_version()

        tuto_draft.children[0].repo_delete()  # chapter 1 is gone !

        another_text = 'another thing'
        self.assertTrue(
            text not in another_text
        )  # to prevent a future modification from breaking this test

        chapter2 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter2.repo_update(another_text, another_text, another_text)
        extract2 = ExtractFactory(container=chapter2, db_object=tuto)
        extract2.repo_update(another_text, another_text)

        published = publish_content(tuto, tuto_draft, is_major_update=False)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        self.manager.es_bulk_indexing_of_model(PublishedContent)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # 2 objects, not 3 !

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=content',
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        contents = [r for r in response if r.meta.doc_type != 'chapter']
        self.assertEqual(response.hits.total,
                         len(contents))  # no chapter found anymore

        result = self.client.get(reverse('search:query') + '?q=' +
                                 another_text + '&models=content',
                                 follow=False)

        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()
        chapters = [r for r in response if r.meta.doc_type == 'chapter']
        self.assertEqual(response.hits.total, 1)
        self.assertEqual(chapters[0].meta.doc_type,
                         FakeChapter.get_es_document_type())
        self.assertEqual(chapters[0].meta.id, published.content_public_slug +
                         '__' + chapter2.slug)  # got new chapter
Beispiel #24
0
    def test_es_manager(self):
        """Test the behavior of the ``es_manager`` command"""

        if not self.index_manager.connected_to_es:
            return

        # in the beginning: the void
        self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices())

        text = u'Ceci est un texte de test'

        # create a topic with a post
        topic = TopicFactory(forum=self.forum, author=self.user, title=text)
        post = PostFactory(topic=topic, author=self.user, position=1)
        post.text = post.text_html = text
        post.save()

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        # create a middle-tutorial and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft = tuto.load_version()
        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        chapter1.repo_update(text, text, text)
        extract1 = ExtractFactory(container=chapter1, db_object=tuto)
        version = extract1.repo_update(text, text)
        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = version
        tuto.sha_draft = version
        tuto.public_version = published
        tuto.save()

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        # 1. test "index-all"
        call_command('es_manager', 'index_all')
        self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get 4 results, one of each type

        must_contain = {'post': False, 'topic': False, 'publishedcontent': False, 'chapter': False}
        id_must_be = {
            'post': str(post.pk),
            'topic': str(topic.pk),
            'publishedcontent': str(published.pk),
            'chapter': tuto.slug + '__' + chapter1.slug
        }

        for hit in results:
            doc_type = hit.meta.doc_type
            must_contain[doc_type] = True
            self.assertEqual(hit.meta.id, id_must_be[doc_type])

        self.assertTrue(all(must_contain))

        # 2. test "clear"
        self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices())  # index in

        call_command('es_manager', 'clear')
        self.assertFalse(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = False

        # must reset every object
        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertFalse(topic.es_already_indexed)
        self.assertTrue(topic.es_flagged)
        self.assertFalse(post.es_already_indexed)
        self.assertTrue(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertFalse(published.es_already_indexed)
        self.assertTrue(published.es_flagged)

        self.assertTrue(self.index_manager.index not in self.index_manager.es.cat.indices())  # index wiped out !

        # 3. test "setup"
        call_command('es_manager', 'setup')
        self.assertTrue(self.index_manager.es.indices.exists(self.index_manager.index))
        self.index_manager.index_exists = True

        self.assertTrue(self.index_manager.index in self.index_manager.es.cat.indices())  # index back in ...

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 0)  # ... but with nothing in it

        result = self.index_manager.es.indices.get_settings(index=self.index_manager.index)
        settings_index = result[self.index_manager.index]['settings']['index']
        self.assertTrue('analysis' in settings_index)  # custom analyzer was setup

        # 4. test "index-flagged" once ...
        call_command('es_manager', 'index_flagged')

        topic = Topic.objects.get(pk=topic.pk)
        post = Post.objects.get(pk=post.pk)

        self.assertTrue(topic.es_already_indexed)
        self.assertFalse(topic.es_flagged)
        self.assertTrue(post.es_already_indexed)
        self.assertFalse(post.es_flagged)

        published = PublishedContent.objects.get(content_pk=tuto.pk)
        self.assertTrue(published.es_already_indexed)
        self.assertFalse(published.es_flagged)

        s = Search()
        s.query(MatchAll())
        results = self.index_manager.setup_search(s).execute()
        self.assertEqual(len(results), 4)  # get the 4 results back
Beispiel #25
0
    def test_basic_search(self):
        """Basic search and filtering"""

        if not self.manager.connected_to_es:
            return

        # 1. Index and test search:
        text = 'test'

        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        # create a middle-size content and publish it
        tuto = PublishableContentFactory(type='TUTORIAL')
        tuto_draft = tuto.load_version()

        tuto.title = text
        tuto.authors.add(self.user)
        tuto.save()

        tuto_draft.repo_update_top_container(
            text, tuto.slug, text,
            text)  # change title to be sure it will match

        chapter1 = ContainerFactory(parent=tuto_draft, db_object=tuto)
        extract = ExtractFactory(container=chapter1, db_object=tuto)
        extract.repo_update(text, text)

        published = publish_content(tuto, tuto_draft, is_major_update=True)

        tuto.sha_public = tuto_draft.current_version
        tuto.sha_draft = tuto_draft.current_version
        tuto.public_version = published
        tuto.save()

        # nothing has been indexed yet:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response = result.context['object_list'].execute()

        self.assertEqual(response.hits.total, 4)  # get 4 results

        # 2. Test filtering:
        topic_1 = Topic.objects.get(pk=topic_1.pk)
        post_1 = Post.objects.get(pk=post_1.pk)
        published = PublishedContent.objects.get(pk=published.pk)

        ids = {
            'topic': [topic_1.es_id],
            'post': [post_1.es_id],
            'content': [
                published.es_id,
                published.content_public_slug + '__' + chapter1.slug
            ],
        }

        search_groups = [
            k for k, v in settings.ZDS_APP['search']['search_groups'].items()
        ]
        group_to_model = {
            k: v[1]
            for k, v in settings.ZDS_APP['search']['search_groups'].items()
        }

        for doc_type in search_groups:
            result = self.client.get(reverse('search:query') + '?q=' + text +
                                     '&models=' + doc_type,
                                     follow=False)
            self.assertEqual(result.status_code, 200)

            response = result.context['object_list'].execute()

            self.assertEqual(response.hits.total,
                             len(ids[doc_type]))  # get 1 result of each …
            for i, r in enumerate(response):
                self.assertIn(
                    r.meta.doc_type,
                    group_to_model[doc_type])  # … and only of the right type …
                self.assertEqual(r.meta.id,
                                 ids[doc_type][i])  # … with the right id !
 def get_targets(self, es):
     for target in Search().using(es).index(self.es_index_gene).extra(
             track_total_hits=True).query(MatchAll()).scan():
         yield target.to_dict()
Beispiel #27
0
    def test_change_topic_impacts_posts(self):

        if not self.manager.connected_to_es:
            return

        # 1. Create a hidden forum belonging to a hidden group and add staff in it.
        text = 'test'

        group = Group.objects.create(name='Les illuminatis anonymes de ZdS')
        _, hidden_forum = create_category(group)

        self.staff.groups.add(group)
        self.staff.save()

        # 2. Create a normal topic and index it
        topic_1 = TopicFactory(forum=self.forum, author=self.user, title=text)
        post_1 = PostFactory(topic=topic_1, author=self.user, position=1)
        post_1.text = post_1.text_html = text
        post_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 2)  # indexing ok

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok
        self.assertEqual(response[0].meta.doc_type,
                         Post.get_es_document_type())
        self.assertEqual(response[0].forum_pk, self.forum.pk)
        self.assertEqual(response[0].topic_pk, topic_1.pk)
        self.assertEqual(response[0].topic_title, topic_1.title)

        # 3. Change topic title and reindex
        topic_1.title = 'new title'
        topic_1.save()

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 1)  # ok

        self.assertEqual(response[0].topic_title,
                         topic_1.title)  # title was changed

        # 4. connect with staff and move topic
        self.assertTrue(
            self.client.login(username=self.staff.username,
                              password='******'))

        data = {'move': '', 'forum': hidden_forum.pk, 'topic': topic_1.pk}
        response = self.client.post(reverse('topic-edit'), data, follow=False)

        self.assertEqual(302, response.status_code)

        self.manager.es_bulk_indexing_of_model(Topic)
        self.manager.es_bulk_indexing_of_model(Post)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(
            response.hits.total,
            1)  # Note: without staff, would not get any results (see below)

        self.assertEqual(response[0].forum_pk,
                         hidden_forum.pk)  # post was updated with new forum

        # 5. Topic is now hidden
        self.client.logout()

        result = self.client.get(reverse('search:query') + '?q=' + text +
                                 '&models=' + Post.get_es_document_type(),
                                 follow=False)

        self.assertEqual(result.status_code, 200)
        response = result.context['object_list'].execute()
        self.assertEqual(response.hits.total, 0)  # ok
Beispiel #28
0
 def get_targets(self, es):
     for target in Search().using(es).index(self.es_index_gene).query(MatchAll()).scan():
         yield str(target.meta.id)
Beispiel #29
0
    def test_upercase_and_lowercase_search_give_same_results(self):
        """Pretty self-explanatory function name, isn't it ?"""

        if not self.manager.connected_to_es:
            return

        # 1. Index lowercase stuffs
        text_lc = 'test'

        topic_1_lc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_lc)

        tag_lc = TagFactory(title=text_lc)
        topic_1_lc.tags.add(tag_lc)
        topic_1_lc.subtitle = text_lc
        topic_1_lc.save()

        post_1_lc = PostFactory(topic=topic_1_lc, author=self.user, position=1)
        post_1_lc.text = post_1_lc.text_html = text_lc
        post_1_lc.save()

        tuto_lc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_lc = tuto_lc.load_version()

        tuto_lc.title = text_lc
        tuto_lc.authors.add(self.user)
        subcategory_lc = SubCategoryFactory(title=text_lc)
        tuto_lc.subcategory.add(subcategory_lc)
        tuto_lc.tags.add(tag_lc)
        tuto_lc.save()

        tuto_draft_lc.description = text_lc
        tuto_draft_lc.repo_update_top_container(text_lc, tuto_lc.slug, text_lc,
                                                text_lc)

        chapter1_lc = ContainerFactory(parent=tuto_draft_lc, db_object=tuto_lc)
        extract_lc = ExtractFactory(container=chapter1_lc, db_object=tuto_lc)
        extract_lc.repo_update(text_lc, text_lc)

        published_lc = publish_content(tuto_lc,
                                       tuto_draft_lc,
                                       is_major_update=True)

        tuto_lc.sha_public = tuto_draft_lc.current_version
        tuto_lc.sha_draft = tuto_draft_lc.current_version
        tuto_lc.public_version = published_lc
        tuto_lc.save()

        # 2. Index uppercase stuffs
        text_uc = 'TEST'

        topic_1_uc = TopicFactory(forum=self.forum,
                                  author=self.user,
                                  title=text_uc)

        topic_1_uc.tags.add(
            tag_lc)  # Note: a constraint forces tags title to be unique
        topic_1_uc.subtitle = text_uc
        topic_1_uc.save()

        post_1_uc = PostFactory(topic=topic_1_uc, author=self.user, position=1)
        post_1_uc.text = post_1_uc.text_html = text_uc
        post_1_uc.save()

        tuto_uc = PublishableContentFactory(type='TUTORIAL')
        tuto_draft_uc = tuto_uc.load_version()

        tuto_uc.title = text_uc
        tuto_uc.authors.add(self.user)
        tuto_uc.subcategory.add(subcategory_lc)
        tuto_uc.tags.add(tag_lc)
        tuto_uc.save()

        tuto_draft_uc.description = text_uc
        tuto_draft_uc.repo_update_top_container(text_uc, tuto_uc.slug, text_uc,
                                                text_uc)

        chapter1_uc = ContainerFactory(parent=tuto_draft_uc, db_object=tuto_uc)
        extract_uc = ExtractFactory(container=chapter1_uc, db_object=tuto_uc)
        extract_uc.repo_update(text_uc, text_uc)

        published_uc = publish_content(tuto_uc,
                                       tuto_draft_uc,
                                       is_major_update=True)

        tuto_uc.sha_public = tuto_draft_uc.current_version
        tuto_uc.sha_draft = tuto_draft_uc.current_version
        tuto_uc.public_version = published_uc
        tuto_uc.save()

        # 3. Index and search:
        self.assertEqual(
            len(
                self.manager.setup_search(Search().query(
                    MatchAll())).execute()), 0)

        # index
        for model in self.indexable:
            if model is FakeChapter:
                continue
            self.manager.es_bulk_indexing_of_model(model)
        self.manager.refresh_index()

        result = self.client.get(reverse('search:query') + '?q=' + text_lc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_lc = result.context['object_list'].execute()
        self.assertEqual(response_lc.hits.total, 8)

        result = self.client.get(reverse('search:query') + '?q=' + text_uc,
                                 follow=False)
        self.assertEqual(result.status_code, 200)

        response_uc = result.context['object_list'].execute()
        self.assertEqual(response_uc.hits.total, 8)

        for responses in zip(
                response_lc,
                response_uc):  # we should get results in the same order!
            self.assertEqual(responses[0].meta.id, responses[1].meta.id)
Beispiel #30
0
    def search(self, search):
        sort = None if search.sort is None else search.sort.translate()

        if not search.words:
            query = MatchAll()
        else:
            query = MultiMatch(query=search.words, fields=['content'])

        elastic_search = Search(index=self.index_name)\
            .doc_type(StarDocument)\
            .query(query)\
            .highlight('content', type='unified', fragment_size=150)

        elastic_search = elastic_search[search.start:search.start +
                                        search.size]

        # Filter results for type, ages, and languages
        if search.types:
            if set(search.types) == {'resource'}:
                # Include past events in resource search results
                search.types.append('event')

            elastic_search = elastic_search.filter('terms',
                                                   **{"type": search.types})
        if search.ages:
            elastic_search = elastic_search.filter('terms',
                                                   **{"ages": search.ages})
        if search.languages:
            elastic_search = elastic_search.filter(
                'terms', **{"languages": search.languages})

        if set(search.types) == {'resource', 'event'}:
            # Include past events in resource search results
            elastic_search = elastic_search.filter(
                'bool',
                **{
                    "should": [
                        self._past_events_filter(),  # Past events OR
                        self._non_events_filter(),  # Date field is empty
                    ]
                })
        elif search.date:
            # Filter results by date
            elastic_search = elastic_search.filter(
                'range', **{"date": {
                    "gte": _start_of_day(search.date)
                }})
        elif set(search.types) == {'event'}:
            elastic_search = elastic_search.filter(
                'bool', **{"should": self._future_events_filter()})
        else:
            elastic_search = elastic_search.filter(
                'bool', **{"should": self._default_filter()})

        if search.geo_box:
            elastic_search = elastic_search.filter(
                'geo_bounding_box', **{
                    "geo_point": {
                        "top_left": {
                            "lat": search.geo_box.top_left.lat,
                            "lon": search.geo_box.top_left.lon
                        },
                        "bottom_right": {
                            "lat": search.geo_box.bottom_right.lat,
                            "lon": search.geo_box.bottom_right.lon
                        }
                    }
                })

        if sort is not None:
            elastic_search = elastic_search.sort(sort)

        if 'user' in g and g.user:
            if Permission.edit_resource not in g.user.role.permissions():
                elastic_search = elastic_search.filter(
                    Q('bool', must_not=[Q('match', is_draft=True)]))
        else:
            elastic_search = elastic_search.filter(
                Q('bool', must_not=[Q('match', is_draft=True)]))

        if search.category and search.category.id:
            elastic_search = elastic_search.filter(
                'terms', category=[str(search.category.search_path())])
            if search.category.calculate_level() == 0:
                exclude = ".*\\,.*\\,.*"
                include = str(search.category.id) + "\\,.*"
                aggregation = A("terms",
                                field='category',
                                exclude=exclude,
                                include=include,
                                size=25)
            elif search.category.calculate_level() == 1:
                include = ".*\\,.*\\,.*"
                aggregation = A("terms",
                                field='category',
                                include=include,
                                size=25)
            else:
                aggregation = A("terms", field='category', size=25)
        else:
            aggregation = A("terms",
                            field='category',
                            exclude=".*\\,.*",
                            size=25)

        elastic_search.aggs.bucket('terms', aggregation)
        elastic_search.aggs.bucket('type', A("terms", field='type'))
        elastic_search.aggs.bucket('ages', A("terms", field='ages'))
        elastic_search.aggs.bucket('languages', A("terms", field='languages'))

        # KEEPING FOR NOW - THESE WERE THE ORIGINAL FACETS WE HAD SET UP.  WILL NEED TO CONVERT TO AGGREGATIONS
        # IF WE WANT TO KEEP ANY OF THESE.
        # 'Location': elasticsearch_dsl.TermsFacet(field='location'),
        # 'Type': elasticsearch_dsl.TermsFacet(field='label'),
        # 'Age Range': elasticsearch_dsl.TermsFacet(field='age_range'),
        # 'Category': elasticsearch_dsl.TermsFacet(field='category'),
        # 'Organization': elasticsearch_dsl.TermsFacet(field='organization'),
        # 'Status': elasticsearch_dsl.TermsFacet(field='status'),
        # 'Topic': elasticsearch_dsl.TermsFacet(field='topic'),

        return elastic_search.execute()