def prepare_index(self, courses):
        """
        Not a test.
        This method is doing the heavy lifting for the tests in this class:
        - prepare the Elasticsearch index,
        - execute the query.
        """
        self.create_filter_pages()
        # Index these 4 courses in Elasticsearch
        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index="test_courses")
        indices_client.close(index="test_courses")
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index="test_courses")
        indices_client.open(index="test_courses")

        # Use the default courses mapping from the Indexer
        indices_client.put_mapping(body=CoursesIndexer.mapping,
                                   doc_type="course",
                                   index="test_courses")
        # Add the sorting script
        ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"])
        # Actually insert our courses in the index
        actions = [{
            "_id": course["id"],
            "_index": "test_courses",
            "_op_type": "create",
            "_type": "course",
            **course,
        } for course in courses]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()
예제 #2
0
def put_settings(obj):
    indices_client = IndicesClient(models.client)
    index_name = obj._meta.es_index_name
    indices_client.close(index=index_name)
    kwargs = {
        "analysis": {
            "analyzer": {
                "default": {
                    "tokenizer": "standard",
                    "filter": ["synonym"]
                },
                "keepwords": {
                    "tokenizer": "standard",
                    "filter": ["keepwords"]
                },
            },
            "filter": {
                "synonym": {
                    "type": "synonym",
                    "synonyms_path": "synonym.txt"
                },
                "keepwords": {
                    "type": "keep",
                    "keep_words_path": "keepwords.txt"
                },
            }
        }
    }
    indices_client.put_settings(index=index_name, body=kwargs)
    indices_client.open(index=index_name)
예제 #3
0
def perform_create_index(indexable, logger):
    """
    Create a new index in ElasticSearch from an indexable instance
    """
    indices_client = IndicesClient(client=ES_CLIENT)
    # Create a new index name, suffixing its name with a timestamp
    new_index = "{:s}_{:s}".format(
        indexable.index_name,
        timezone.now().strftime("%Y-%m-%d-%Hh%Mm%S.%fs"))

    # Create the new index
    logger.info(
        'Creating a new Elasticsearch index "{:s}"...'.format(new_index))
    indices_client.create(index=new_index)

    # The index needs to be closed before we set an analyzer
    indices_client.close(index=new_index)
    indices_client.put_settings(body=ANALYSIS_SETTINGS, index=new_index)
    indices_client.open(index=new_index)

    indices_client.put_mapping(body=indexable.mapping,
                               doc_type=indexable.document_type,
                               index=new_index)

    # Populate the new index with data provided from our indexable class
    richie_bulk(indexable.get_es_documents(new_index))

    # Return the name of the index we just created in ElasticSearch
    return new_index
예제 #4
0
    def execute_query(self, querystring=""):
        """
        Not a test.
        This method is doing the heavy lifting for the tests in this class: create and fill the
        index with our courses so we can run our queries and check our facet counts.
        It also executes the query and returns the result from the API.
        """
        # Create the subject category page. This is necessary to link the subjects we
        # defined above with the "subjects" filter
        # As it is the only page we create, we expect it to have the path "0001"
        CategoryFactory(page_reverse_id="subjects", should_publish=True)

        # Index these 4 courses in Elasticsearch
        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index="test_courses")
        indices_client.close(index="test_courses")
        indices_client.put_settings(body=ANALYSIS_SETTINGS, index="test_courses")
        indices_client.open(index="test_courses")

        # Use the default courses mapping from the Indexer
        indices_client.put_mapping(
            body=CoursesIndexer.mapping, doc_type="course", index="test_courses"
        )
        # Add the sorting script
        ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"])
        # Actually insert our courses in the index
        actions = [
            {
                "_id": course["id"],
                "_index": "test_courses",
                "_op_type": "create",
                "_type": "course",
                "absolute_url": {"en": "url"},
                "cover_image": {"en": "image"},
                "title": {"en": "title"},
                **course,
                "course_runs": [
                    {
                        "languages": course_run["languages"],
                        "start": arrow.utcnow().datetime,
                        "end": arrow.utcnow().datetime,
                        "enrollment_start": arrow.utcnow().datetime,
                        "enrollment_end": arrow.utcnow().datetime,
                    }
                    for course_run in course["course_runs"]
                ],
            }
            for course in COURSES
        ]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(f"/api/v1.0/courses/?{querystring:s}")
        self.assertEqual(response.status_code, 200)

        return json.loads(response.content)
    def execute_query(self, courses, querystring="", **extra):
        """
        Not a test.
        Prepare the ElasticSearch index and execute the query in it.
        """

        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index=COURSES_INDEX)

        # The index needs to be closed before we set an analyzer
        indices_client.close(index=COURSES_INDEX)
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index=COURSES_INDEX)
        indices_client.open(index=COURSES_INDEX)

        # Use the default courses mapping from the Indexer
        indices_client.put_mapping(body=CoursesIndexer.mapping,
                                   doc_type="course",
                                   index=COURSES_INDEX)
        # Add the sorting script
        ES_CLIENT.put_script(id="score", body=CoursesIndexer.scripts["score"])
        ES_CLIENT.put_script(id="state_field",
                             body=CoursesIndexer.scripts["state_field"])

        # Actually insert our courses in the index
        actions = [{
            "_id": course["id"],
            "_index": COURSES_INDEX,
            "_op_type": "create",
            "_type": "course",
            "absolute_url": {
                "en": "en/url",
                "fr": "fr/url"
            },
            "categories": ["1", "2", "3"],
            "cover_image": {
                "en": "en/image",
                "fr": "fr/image"
            },
            "is_meta": False,
            "logo": {
                "en": "/en/some/img.png",
                "fr": "/fr/some/img.png"
            },
            "nb_children": 0,
            "organizations": ["11", "12", "13"],
            **course,
        } for course in courses]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        results = self.client.get(
            f"/api/v1.0/courses/autocomplete/?{querystring:s}", **extra)
        self.assertEqual(results.status_code, 200)

        return json.loads(results.content)
예제 #6
0
    def execute_query(self, kind, querystring=""):
        """
        Not a test.
        This method is doing the heavy lifting for the tests in this class: create and fill the
        index with our categories so we can run our queries and check the results.
        It also executes the query and returns the result from the API.
        """
        # Index these categories in Elasticsearch
        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indexes so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index="test_categories")
        indices_client.close(index="test_categories")
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index="test_categories")
        indices_client.open(index="test_categories")

        # Use the default categories mapping from the Indexer
        indices_client.put_mapping(body=CategoriesIndexer.mapping,
                                   doc_type="category",
                                   index="test_categories")

        # Actually insert our categories in the index
        actions = [{
            "_id": category["id"],
            "_index": "test_categories",
            "_op_type": "create",
            "_type": "category",
            "absolute_url": {
                "en": "en/url"
            },
            "description": {
                "en": "en/description"
            },
            "icon": {
                "en": "en/icon"
            },
            "is_meta": False,
            "logo": {
                "en": "en/logo"
            },
            "nb_children": 0,
            "path": category["id"],
            **category,
        } for category in CATEGORIES]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(f"/api/v1.0/{kind:s}/?{querystring:s}")
        self.assertEqual(response.status_code, 200)

        return json.loads(response.content)
예제 #7
0
    def execute_query(self, querystring=""):
        """
        Not a test.
        This method is doing the heavy lifting for the tests in this class: create and fill the
        index with our organizations so we can run our queries and check the results.
        It also executes the query and returns the result from the API.
        """
        # Index these organizations in Elasticsearch
        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index="test_organizations")
        indices_client.close(index="test_organizations")
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index="test_organizations")
        indices_client.open(index="test_organizations")

        # Use the default organizations mapping from the Indexer
        indices_client.put_mapping(
            body=OrganizationsIndexer.mapping,
            doc_type="organization",
            index="test_organizations",
        )

        # Actually insert our organizations in the index
        actions = [{
            "_id": organization["id"],
            "_index": "test_organizations",
            "_op_type": "create",
            "_type": "organization",
            "absolute_url": {
                "en": "en/url"
            },
            "description": {
                "en": "en/description"
            },
            "logo": {
                "en": "en/image"
            },
            **organization,
        } for organization in ORGANIZATIONS]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(f"/api/v1.0/organizations/?{querystring:s}")
        self.assertEqual(response.status_code, 200)

        return json.loads(response.content)
예제 #8
0
def add_synonyms_to_index(ic: IndicesClient) -> None:
    """
    Modify the index setting, add synonym mappings for "BC" => "British Columbia",
    "WA" => "Washington" and "AB" => "Alberta"

    Parameters
    ----------
    ic : IndicesClient
        The client for control index settings in Elasticsearch
    
    Returns
    -------
    None
    """

    request_body = {
        "settings": {
            "analysis": {
                "analyzer": {
                    "my_analyzer": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": ["lowercase", "my_stops", "my_synonyms"]
                    }
                },
                "filter": {
                    "my_stops": {
                        "type": "stop",
                        "stopwords_path": "stopwords.txt"
                    },
                    "my_synonyms": {
                        "type":
                        "synonym",
                        "synonyms": [
                            "BC => British Columbia", "WA => Washington",
                            "AB => Alberta"
                        ]
                    }
                }
            }
        }
    }
    ic.close(index="wikipedia")
    ic.put_settings(index="wikipedia", body=request_body)
    ic.open(index="wikipedia")
def add_synonyms_to_index(ic: IndicesClient) -> None:
    """
    Modify the index setting, add synonym mappings for "BC" => "British Columbia",
    "WA" => "Washington" and "AB" => "Alberta"

    Parameters
    ----------
    ic : IndicesClient
        The client for control index settings in Elasticsearch
    
    Returns
    -------
    None
    """

    #
    body = {
        "analysis": {
            "analyzer": {
                "my_analyzer": {
                    "filter": ["lowercase", "my_stops", "my_synonyms"]
                }
            },
            "filter": {
                "my_synonyms": {
                    "type":
                    "synonym",
                    "synonyms":
                    ["British Columbia, BC", "Alberta, AB", "Washington, WA"]
                }
            }
        }
    }

    ic.close(index="wikipedia")
    ic.put_settings(body=body, index="wikipedia")

    ic.open(index="wikipedia")
예제 #10
0
    def recreate_index(self):

        from elasticsearch.client import IndicesClient

        indices_client = IndicesClient(client=settings.ES_CLIENT)
        index_name = Archive._meta.es_index_name
        if indices_client.exists(index_name):
            indices_client.delete(index=index_name)
        indices_client.create(index=index_name)
        indices_client.close(index=index_name)
        indices_client.put_settings(
            index=index_name,
            body={
                "index": {
                    "max_result_window": settings.MAX_RESULT_WINDOW
                },
                "analysis": {
                    "filter": {
                        "edge_ngram_filter": {
                            "type": "edge_ngram",
                            "min_gram": 2,
                            "max_gram": 20
                        }
                    },
                    "analyzer": {
                        "edge_ngram_analyzer": {
                            "type": "custom",
                            "tokenizer": "standard",
                            "filter": ["lowercase", "edge_ngram_filter"]
                        }
                    }
                }
            })
        indices_client.put_mapping(doc_type=Archive._meta.es_type_name,
                                   body=Archive._meta.es_mapping,
                                   index=index_name)
        indices_client.open(index=index_name)
예제 #11
0
    def execute_query(self, querystring="", **extra):
        """
        Not a test.
        Prepare the ElasticSearch index and execute the query in it.
        """

        persons = [
            {
                "complete": {
                    "en": slice_string_for_completion("Éponine Thénardier")
                },
                "id": "25",
                "title": {
                    "en": "Éponine Thénardier"
                },
            },
            {
                "complete": {
                    "en":
                    slice_string_for_completion("Monseigneur Bienvenu Myriel")
                },
                "id": "34",
                "title": {
                    "en": "Monseigneur Bienvenu Myriel"
                },
            },
            {
                "complete": {
                    "en": slice_string_for_completion("Fantine")
                },
                "id": "52",
                "title": {
                    "en": "Fantine"
                },
            },
        ]

        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indexes so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index=PERSONS_INDEX)

        # The index needs to be closed before we set an analyzer
        indices_client.close(index=PERSONS_INDEX)
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index=PERSONS_INDEX)
        indices_client.open(index=PERSONS_INDEX)

        # Use the default persons mapping from the Indexer
        indices_client.put_mapping(body=PersonsIndexer.mapping,
                                   doc_type="person",
                                   index=PERSONS_INDEX)

        # Actually insert our persons in the index
        actions = [{
            "_id": person["id"],
            "_index": PERSONS_INDEX,
            "_op_type": "create",
            "_type": "person",
            "absolute_url": {
                "en": "url"
            },
            "logo": {
                "en": "/some/img.png"
            },
            **person,
        } for person in persons]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(
            f"/api/v1.0/persons/autocomplete/?{querystring:s}", **extra)
        self.assertEqual(response.status_code, 200)

        return persons, json.loads(response.content)
예제 #12
0
    def execute_query(self, querystring="", **extra):
        """
        Not a test.
        Prepare the ElasticSearch index and execute the query in it.
        """

        courses = [
            {
                "complete": {
                    "en":
                    slice_string_for_completion(
                        "Artificial intelligence for mushroom picking"),
                    "fr":
                    slice_string_for_completion(
                        "Intelligence artificielle pour la cueillette de chàmpiñons"
                    ),
                },
                "course_runs": [],
                "id": "24",
                "path": "001000",
                "title": {
                    "en":
                    "Artificial intelligence for mushroom picking",
                    "fr":
                    "Intelligence artificielle pour la cueillette de chàmpiñons",
                },
            },
            {
                "complete": {
                    "en":
                    slice_string_for_completion(
                        "Kung-fu moves for cloud infrastructure security"),
                    "fr":
                    slice_string_for_completion(
                        "Protéger ses serveurs par la pratique des arts martiaux"
                    ),
                },
                "course_runs": [],
                "id": "33",
                "path": "001001",
                "title": {
                    "en":
                    "Kung-fu moves for cloud infrastructure security",
                    "fr":
                    "Prôtéger ses serveurs par la pratique des arts martiaux",
                },
            },
            {
                "complete": {
                    "en":
                    slice_string_for_completion(
                        "Securing funding through token sales"),
                    "fr":
                    slice_string_for_completion("Lever des fonds par des ICO"),
                },
                "course_runs": [],
                "id": "51",
                "path": "001002",
                "title": {
                    "en": "Securing funding through token sales",
                    "fr": "Lever des fonds par des ICO",
                },
            },
        ]

        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index=COURSES_INDEX)

        # The index needs to be closed before we set an analyzer
        indices_client.close(index=COURSES_INDEX)
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index=COURSES_INDEX)
        indices_client.open(index=COURSES_INDEX)

        # Use the default courses mapping from the Indexer
        indices_client.put_mapping(body=CoursesIndexer.mapping,
                                   doc_type="course",
                                   index=COURSES_INDEX)
        # Add the sorting script
        ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"])
        # Actually insert our courses in the index
        actions = [{
            "_id": course["id"],
            "_index": COURSES_INDEX,
            "_op_type": "create",
            "_type": "course",
            "absolute_url": {
                "en": "en/url",
                "fr": "fr/url"
            },
            "categories": ["1", "2", "3"],
            "cover_image": {
                "en": "en/image",
                "fr": "fr/image"
            },
            "is_meta": False,
            "logo": {
                "en": "/en/some/img.png",
                "fr": "/fr/some/img.png"
            },
            "nb_children": 0,
            "organizations": ["11", "12", "13"],
            **course,
        } for course in courses]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(
            f"/api/v1.0/courses/autocomplete/?{querystring:s}", **extra)
        self.assertEqual(response.status_code, 200)

        return courses, json.loads(response.content)
예제 #13
0
def add_synonyms_to_index(ic: IndicesClient) -> None:
    """
    Modify the index setting, add synonym mappings for "BC" => "British Columbia",
    "WA" => "Washington" and "AB" => "Alberta"

    Parameters
    ----------
    ic : IndicesClient
        The client for control index settings in Elasticsearch

    Returns
    -------
    None
    """

    # Fill in the code
    index_name = 'wikipedia'

    with open('./stopwords.txt') as f:
        # with open('/usr/share/elasticsearch/config/stopwords.txt') as f:
        content = f.readlines()
    content = [x.strip() for x in content]
    ic.close(index='wikipedia')
    ic.put_settings(
        index=index_name,
        body={
            'settings': {
                'analysis': {
                    'analyzer': {
                        'my_analyzer': {
                            'type': 'custom',
                            'tokenizer': 'standard',
                            'filter': ['lowercase', 'my_stops', 'my_synonyms']
                        },
                    },
                    'filter': {
                        'my_stops': {
                            'type': 'stop',
                            'stopwords_path': 'stopwords.txt'
                            # 'stopwords': content,
                        },
                        "my_synonyms": {
                            "type":
                            "synonym",
                            "synonyms": [
                                "BC => British Columbia", "AB => Alberta",
                                "WA => Washington"
                            ]
                        }
                    },
                },
            },
            "mappings": {
                "properties": {
                    "title": {
                        "type": "text",
                        "analyzer": "my_analyzer"
                    },
                    "body": {
                        "type": "text",
                        "analyzer": "my_analyzer"
                    }
                }
            }
        },
    )
    ic.open(index='wikipedia')
예제 #14
0
def create_wikipedia_index(ic: IndicesClient) -> None:
    """
    Add an index to Elasticsearch called 'wikipedia'

    Parameters
    ----------
    ic : IndicesClient
        The client to control Elasticsearch index settings

    Returns
    -------
    None
    """
    # Fill in the code here

    index_name = 'wikipedia'

    # with open('./stopwords.txt') as f:
    #     # with open('/usr/share/elasticsearch/config/stopwords.txt') as f:
    #     content = f.readlines()
    # content = [x.strip() for x in content]
    if ic.exists(index_name):
        ic.close(index='wikipedia')
        ic.put_settings(
            index=index_name,
            body={
                'settings': {
                    'analysis': {
                        'analyzer': {
                            'my_analyzer': {
                                'type': 'custom',
                                'tokenizer': 'standard',
                                'filter': ['lowercase', 'my_stops', ]
                            },
                        },
                        'filter': {
                            'my_stops': {
                                'type': 'stop',
                                # 'stopwords': content,
                                'stopwords_path': 'stopwords.txt',
                            },
                        },
                    },
                },
                "mappings": {
                    "properties": {
                        "title": {
                            "type": "text",
                            "analyzer": "my_analyzer",
                        },
                        "body": {
                            "type": "text",
                            "analyzer": "my_analyzer",
                        }
                    }
                }

            },
            # Will ignore 400 errors, remove to ensure you're prompted
        )
        ic.open(index='wikipedia')

    else:
        ic.create(
            index=index_name,
            body={
                'settings': {
                    'analysis': {
                        'analyzer': {
                            'my_analyzer': {
                                'type': 'custom',
                                'tokenizer': 'standard',
                                'filter': ['lowercase', 'my_stops', ]
                            },
                        },
                        'filter': {
                            'my_stops': {
                                'type': 'stop',
                                # 'stopwords': content,
                                'stopwords_path': 'stopwords.txt',
                            },
                        },
                    },
                },
                "mappings": {
                        "properties": {
                            "title": {
                                "type": "text",
                                "analyzer": "my_analyzer",
                            },
                            "body": {
                                "type": "text",
                                "analyzer": "my_analyzer",
                            }
                        }
                }

            },
            # Will ignore 400 errors, remove to ensure you're prompted
        )
예제 #15
0
    def execute_query(self, querystring="", **extra):
        """
        Not a test.
        Prepare the ElasticSearch index and execute the query in it.
        """

        organizations = [
            {
                "complete": {
                    "en":
                    slice_string_for_completion("University of Paris 18"),
                    "fr":
                    slice_string_for_completion("Université de Paris 18"),
                },
                "id": "25",
                "path": "000000",
                "title": {
                    "en": "University of Paris 18",
                    "fr": "Université de Paris 18",
                },
            },
            {
                "complete": {
                    "en":
                    slice_string_for_completion("School of bikeshedding"),
                    "fr": slice_string_for_completion("École d'abri-vélo"),
                },
                "id": "34",
                "path": "000001",
                "title": {
                    "en": "School of bikeshedding",
                    "fr": "École d'abri-vélo"
                },
            },
            {
                "complete": {
                    "en":
                    slice_string_for_completion("University of Paris 19"),
                    "fr":
                    slice_string_for_completion("Université de Paris 19"),
                },
                "id": "52",
                "path": "000002",
                "title": {
                    "en": "University of Paris 19",
                    "fr": "Université de Paris 19",
                },
            },
        ]

        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index=ORGANIZATIONS_INDEX)

        # The index needs to be closed before we set an analyzer
        indices_client.close(index=ORGANIZATIONS_INDEX)
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index=ORGANIZATIONS_INDEX)
        indices_client.open(index=ORGANIZATIONS_INDEX)

        # Use the default organizations mapping from the Indexer
        indices_client.put_mapping(
            body=OrganizationsIndexer.mapping,
            doc_type="organization",
            index=ORGANIZATIONS_INDEX,
        )
        # Actually insert our organizations in the index
        actions = [{
            "_id": organization["id"],
            "_index": ORGANIZATIONS_INDEX,
            "_op_type": "create",
            "_type": "organization",
            "absolute_url": {
                "en": "url"
            },
            "cover_image": {
                "en": "image"
            },
            "is_meta": False,
            "logo": {
                "en": "/some/img.png"
            },
            "nb_children": 0,
            **organization,
        } for organization in organizations]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(
            f"/api/v1.0/organizations/autocomplete/?{querystring:s}", **extra)
        self.assertEqual(response.status_code, 200)

        return organizations, json.loads(response.content)
예제 #16
0
class PartialMappingsTestCase(TestCase):
    """
    Make sure our mappings (esp. dynamic templates) are correctly understood by ElasticSearch
    """

    def setUp(self):
        """
        Instantiate our ES client and make sure all indexes are deleted before each test
        """
        super().setUp()
        self.indices_client = IndicesClient(client=ES_CLIENT)
        self.indices_client.delete(index="_all")

    def test_partial_mappings_multilingual_text(self):
        """
        Make sure our multilingual_text dynamic mapping results in the proper mappings being
        generated when objects with the expected format are indexed
        """
        document_type = "stub"
        index_name = "stub_index"
        mapping = {"dynamic_templates": MULTILINGUAL_TEXT}

        # Create the index and set a mapping that includes the pattern we want to test
        self.indices_client.create(index=index_name)
        self.indices_client.put_mapping(
            index=index_name, doc_type=document_type, body=mapping
        )
        # The index needs to be closed before we set an analyzer
        self.indices_client.close(index=index_name)
        self.indices_client.put_settings(body=ANALYSIS_SETTINGS, index=index_name)
        self.indices_client.open(index=index_name)

        # The stub mapping only contains our dynamic template
        mapping = self.indices_client.get_mapping(
            index=index_name, doc_type=document_type
        )
        self.assertEqual(
            mapping[index_name]["mappings"][document_type],
            {"dynamic_templates": MULTILINGUAL_TEXT},
        )

        # Index an object that should trigger a match for our dynamic template
        ES_CLIENT.index(
            index=index_name,
            doc_type=document_type,
            body={"title": {"fr": "Un titre en français à titre d'exemple"}},
        )

        # The stub mapping has been extended with a matching property for 'fr'
        mapping = self.indices_client.get_mapping(
            index=index_name, doc_type=document_type
        )
        self.assertEqual(
            mapping[index_name]["mappings"][document_type],
            {
                "dynamic_templates": MULTILINGUAL_TEXT,
                "properties": {
                    "title": {
                        "properties": {
                            "fr": {
                                "type": "text",
                                "fields": {
                                    "language": {"type": "text", "analyzer": "french"},
                                    "trigram": {
                                        "type": "text",
                                        "analyzer": "french_trigram",
                                        "search_analyzer": "french",
                                    },
                                },
                            }
                        }
                    }
                },
            },
        )

        # Index an object that should trigger a different match for our dynamic template
        ES_CLIENT.index(
            index=index_name,
            doc_type=document_type,
            body={"title": {"en": "An English title as an example"}},
        )

        # The sub mapping has been extended with a matching property for 'en'
        mapping = self.indices_client.get_mapping(
            index=index_name, doc_type=document_type
        )
        self.assertEqual(
            mapping[index_name]["mappings"][document_type],
            {
                "dynamic_templates": MULTILINGUAL_TEXT,
                "properties": {
                    "title": {
                        "properties": {
                            "en": {
                                "type": "text",
                                "fields": {
                                    "language": {"type": "text", "analyzer": "english"},
                                    "trigram": {
                                        "type": "text",
                                        "analyzer": "english_trigram",
                                        "search_analyzer": "english",
                                    },
                                },
                            },
                            "fr": {
                                "type": "text",
                                "fields": {
                                    "language": {"type": "text", "analyzer": "french"},
                                    "trigram": {
                                        "type": "text",
                                        "analyzer": "french_trigram",
                                        "search_analyzer": "french",
                                    },
                                },
                            },
                        }
                    }
                },
            },
        )
    def execute_query(self, querystring="", **extra):
        """
        Not a test.
        Prepare the ElasticSearch index and execute the query in it.
        """

        categories = [
            {
                "complete": {
                    "en":
                    slice_string_for_completion("Electric Birdwatching"),
                    "fr":
                    slice_string_for_completion(
                        "Observation des oiseaux électriques"),
                },
                "id": "24",
                "kind": "subjects",
                "path": "001000",
                "title": {
                    "en": "Electric Birdwatching",
                    "fr": "Observation des oiseaux électriques",
                },
            },
            {
                "complete": {
                    "en": slice_string_for_completion("Ocean biking"),
                    "fr": slice_string_for_completion("Cyclisme océanique"),
                },
                "id": "33",
                "kind": "subjects",
                "path": "001001",
                "title": {
                    "en": "Ocean biking",
                    "fr": "Cyclisme océanique"
                },
            },
            {
                "complete": {
                    "en": slice_string_for_completion("Elegiac bikeshedding"),
                    "fr":
                    slice_string_for_completion("Élégie de l'abri à vélos"),
                },
                "id": "51",
                "kind": "subjects",
                "path": "001002",
                "title": {
                    "en": "Elegiac bikeshedding",
                    "fr": "Élégie de l'abri à vélos",
                },
            },
            {
                "complete": {
                    "en": slice_string_for_completion("Electric Decoys"),
                    "fr": slice_string_for_completion("Leurres électriques"),
                },
                "id": "44",
                "kind": "not_subjects",
                "path": "001003",
                "title": {
                    "en": "Electric Decoys",
                    "fr": "Leurres électriques"
                },
            },
        ]

        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indexes so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index=CATEGORIES_INDEX)

        # The index needs to be closed before we set an analyzer
        indices_client.close(index=CATEGORIES_INDEX)
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index=CATEGORIES_INDEX)
        indices_client.open(index=CATEGORIES_INDEX)

        # Use the default categories mapping from the Indexer
        indices_client.put_mapping(body=CategoriesIndexer.mapping,
                                   doc_type="category",
                                   index=CATEGORIES_INDEX)

        # Actually insert our categories in the index
        actions = [{
            "_id": category["id"],
            "_index": CATEGORIES_INDEX,
            "_op_type": "create",
            "_type": "category",
            "absolute_url": {
                "en": "en/url",
                "fr": "fr/url"
            },
            "cover_image": {
                "en": "en/image",
                "fr": "fr/image"
            },
            "is_meta": False,
            "logo": {
                "en": "en/some/img.png",
                "fr": "fr/some/img.png"
            },
            "nb_children": 0,
            **category,
        } for category in categories]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(
            f"/api/v1.0/subjects/autocomplete/?{querystring:s}", **extra)
        self.assertEqual(response.status_code, 200)

        return categories, json.loads(response.content)
예제 #18
0
from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch_dsl import Mapping, String, Search

es = Elasticsearch()
ies = IndicesClient(es)

ies.delete('test')
ies.create('test')
ies.close('test')

ies.put_settings(index='test', body={
    "analysis":{
      "analyzer":{
        "default":{
          "type":"custom",
          "tokenizer":"standard",
          "filter":[ "standard", "lowercase", "stop", "kstem" ]
        }
      }
    }
})



m = Mapping('test')
m.field('f', String())
m.save(index='test', using=es)

ies.open(index='test')
예제 #19
0
#deles = ElasticSearch('http://127.0.0.1:9201', timeout=15)

if __name__ == '__main__':
    indexs = []
    default = {}
    #indexs = ["logstash-2017.11.08", "vod_access-2017.11.12", "logstash-2017.11.09", "logstash-2017.11.10", "vod_access-2017.11.11"]
    for hit in sorted(es.get_alias(expand_wildcards='all').keys(),
                      reverse=True):
        indexs.append(hit)
    for i in [x.split('-') for x in indexs]:
        #print i
        default[i[0]] = [
            '-'.join(y) for y in [x.split('-') for x in indexs] if y[0] == i[0]
        ]

    for key, value in default.iteritems():

        if key == 'logstash':

            for j in (value[10:]):
                es.close(j)

        elif key == 'kibana':

            for l in (value[5:]):
                es.close(l)
        else:

            for k in (value[2:]):
                es.close(k)
예제 #20
0
 def get_client_and_close_indices(es, indices):
     indices_client = IndicesClient(es)
     indices_client.close(indices)
     return indices_client
예제 #21
0
    def execute_query(self, querystring="", suite=None):
        """
        Not a test.
        This method is doing the heavy lifting for the tests in this class:
        - generate a set of courses randomly associated to our "interesting" course runs,
        - prepare the Elasticsearch index,
        - execute the query.
        """
        # Shuffle our course runs to assign them randomly to 4 courses
        # For example: ["H", "D", "C", "F", "B", "A", "G", "E"]
        suite = suite or random.sample(list(COURSE_RUNS), len(COURSE_RUNS))

        # Assume 4 courses and associate 2 course runs to each course
        # > [[3, ["H", "D"]], [0, ["C", "F"]], [1, ["B", "A"]], [2, ["G", "E"]]]
        courses_definition = [[i, suite[2 * i : 2 * i + 2]] for i in range(4)]  # noqa

        # Index these 4 courses in Elasticsearch
        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index="test_courses")
        indices_client.close(index="test_courses")
        indices_client.put_settings(body=ANALYSIS_SETTINGS, index="test_courses")
        indices_client.open(index="test_courses")

        # Use the default courses mapping from the Indexer
        indices_client.put_mapping(
            body=CoursesIndexer.mapping, doc_type="course", index="test_courses"
        )
        # Add the sorting script
        ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"])
        # Actually insert our courses in the index
        now = arrow.utcnow()
        actions = [
            {
                "_id": course_id,
                "_index": "test_courses",
                "_op_type": "create",
                "_type": "course",
                # The sorting algorithm assumes that course runs are sorted by decreasing
                # end date in order to limit the number of iterations and courses with a
                # lot of archived courses.
                "absolute_url": {"en": "url"},
                "cover_image": {"en": "cover_image.jpg"},
                "duration": {"en": "N/A"},
                "effort": {"en": "N/A"},
                "icon": {"en": "icon.jpg"},
                "title": {"en": "title"},
                **COURSES[course_id],
                "course_runs": sorted(
                    [
                        # Each course randomly gets 2 course runs (thanks to above shuffle)
                        COURSE_RUNS[course_run_id]
                        for course_run_id in course_run_ids
                    ],
                    key=lambda o: now - o["end"],
                ),
            }
            for course_id, course_run_ids in courses_definition
        ]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(f"/api/v1.0/courses/?{querystring:s}")
        self.assertEqual(response.status_code, 200)

        return courses_definition, json.loads(response.content)
예제 #22
0
 def get_client_and_close_indices(es, indices):
     indices_client = IndicesClient(es)
     indices_client.close(indices)
     return indices_client
예제 #23
0
파일: s3_restore.py 프로젝트: bkj/es-backup
parser.add_argument('--indices',  dest = 'indices', action = 'store', required = True)
parser.add_argument('--snapshot', dest = 'snapshot', action = 'store', required = True)
args = parser.parse_args()

# --
# Config

config         = json.load(open('config.json'))
config_private = json.load(open('config-private.json'))

# --
# Run

client = Elasticsearch([{'host' : config['ES_HOST'], 'port' : config['ES_PORT']}], timeout = 20)
sc     = SnapshotClient(client)
ic     = IndicesClient(client)

try:
    ic.close(index = args.indices)
except:
    print '! could not close index'

_ = sc.restore(
    repository          = config['REPO_NAME'],
    snapshot            = args.snapshot,
    body                = {"indices" : args.indices},
    wait_for_completion = True
)

ic.open(index = args.indices)