def prepare_index(self, courses):
        """
        Not a test.
        This method is doing the heavy lifting for the tests in this class:
        - prepare the Elasticsearch index,
        - execute the query.
        """
        self.create_filter_pages()
        # Index these 4 courses in Elasticsearch
        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index="test_courses")
        indices_client.close(index="test_courses")
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index="test_courses")
        indices_client.open(index="test_courses")

        # Use the default courses mapping from the Indexer
        indices_client.put_mapping(body=CoursesIndexer.mapping,
                                   doc_type="course",
                                   index="test_courses")
        # Add the sorting script
        ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"])
        # Actually insert our courses in the index
        actions = [{
            "_id": course["id"],
            "_index": "test_courses",
            "_op_type": "create",
            "_type": "course",
            **course,
        } for course in courses]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()
예제 #2
0
def import_examples_into_es(examples: list):
    index_name = config.index_name
    type_name = config.type_name
    buck_size = config.buck_size

    es = Elasticsearch(config.es_url)
    es_index = IndicesClient(es)
    if es_index.exists(index=index_name):
        es_index.delete(index=index_name)
    # 创建索引
    with open(config.es_index_json) as f:
        mappings = json.load(f)

    res = es.indices.create(index=index_name, body=mappings)

    # 数据批量导入es
    for i in range(len(examples)):
        examples[i] = {
            "_index": index_name,
            "_type": type_name,
            "_id": examples[i]["ntc_id"],
            "_source": examples[i]
        }

    for i in tqdm(range(ceil(len(examples) / buck_size)), desc="Import into ES"):
        bulk(es, actions=examples[i * buck_size: min((i + 1) * buck_size, len(examples))])
class TestSingleDocSigTerms(TestCase):
    def setUp(self):
        super(TestSingleDocSigTerms, self).setUp()

        self.es = Elasticsearch(
            hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'single_doc_sigterms_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index,
                       self.doc_type,
                       {self.field: 'foo ba knark foo knirk knark foo'},
                       id='doc_1')

    def test_tf_for_doc_id(self):
        sigterms = SingleDocSigTerms(self.es, self.index, self.doc_type,
                                     self.field, None)

        resp = dict(sigterms.tf_for_doc_id('doc_1'))
        self.assertEquals(4, len(resp))
        self.assertEquals(3, resp['foo'])
        self.assertEquals(2, resp['knark'])
        self.assertEquals(1, resp['ba'])
        self.assertEquals(1, resp['knirk'])
예제 #4
0
def create_index_survey():
    indices_client = IndicesClient(models.client)
    index_name = models.SurveyMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    #put_settings(models.ScentemotionMap)
    # add qstfld fields
    es_mapping = models.SurveyMap._meta.es_mapping
    for qst, mapping in survey.qst2fld.items():
        fields = mapping[0]
        field_type = mapping[1]
        if field_type == 'nested_qst_ans':
            for field in fields:
                if field not in es_mapping['properties']:
                    es_mapping['properties'][field] = {}
                    es_mapping['properties'][field]['type'] = 'nested'
                    es_mapping['properties'][field]['properties'] = {}
                    es_mapping['properties'][field]['properties']['question'] = {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}}
                    es_mapping['properties'][field]['properties']['answer'] = {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}}
                        #'type'       : 'nested',
                        #'properties' : {
                        #    'question' : {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}},
                        #    'answer'   : {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}},
                        #    }
                        #},
    indices_client.put_mapping(
        doc_type=models.SurveyMap._meta.es_type_name,
        #body=models.SurveyMap._meta.es_mapping,
        body=es_mapping,
        index=index_name
        )
예제 #5
0
    def reindex(self):

        elastic_client = Elasticsearch([{
            "host": self.__host,
            "port": self.__port
        }])
        index_client = IndicesClient(elastic_client)

        # Create new index with necessory fields mapping
        # , master_timeout=10, timeout=10
        index_client.create(index=self.__target_index, body=self.__body)

        # reindexind data from source index to target index
        helpers.reindex(client=elastic_client,
                        source_index=self.__source_index,
                        target_index=self.__target_index)

        # creating alias for target index
        alias = {'actions': []}
        # remove_action = {"remove": {"index": self.__source_index, "alias": self.__alias}}
        add_action = {
            "add": {
                "index": self.__target_index,
                "alias": self.__alias
            }
        }
        # alias['actions'].append(remove_action)
        alias['actions'].append(add_action)

        # deleteing the source index
        index_client.delete(index=self.__source_index)
        index_client.update_aliases(body=alias)
class TestSingleDocSigTerms(TestCase):
    def setUp(self):
        super(TestSingleDocSigTerms, self).setUp()

        self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'single_doc_sigterms_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index, self.doc_type, {self.field: 'foo ba knark foo knirk knark foo'}, id='doc_1')

    def test_tf_for_doc_id(self):
        sigterms = SingleDocSigTerms(self.es, self.index, self.doc_type, self.field, None)

        resp = dict(sigterms.tf_for_doc_id('doc_1'))
        self.assertEquals(4, len(resp))
        self.assertEquals(3, resp['foo'])
        self.assertEquals(2, resp['knark'])
        self.assertEquals(1, resp['ba'])
        self.assertEquals(1, resp['knirk'])
def create_index_conf():
    indices_client = IndicesClient(models.client)
    index_name = 'conf'
    doc_type = index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
예제 #8
0
    def execute_query(self, querystring=""):
        """
        Not a test.
        This method is doing the heavy lifting for the tests in this class: create and fill the
        index with our courses so we can run our queries and check our facet counts.
        It also executes the query and returns the result from the API.
        """
        # Create the subject category page. This is necessary to link the subjects we
        # defined above with the "subjects" filter
        # As it is the only page we create, we expect it to have the path "0001"
        CategoryFactory(page_reverse_id="subjects", should_publish=True)

        # Index these 4 courses in Elasticsearch
        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index="test_courses")
        indices_client.close(index="test_courses")
        indices_client.put_settings(body=ANALYSIS_SETTINGS, index="test_courses")
        indices_client.open(index="test_courses")

        # Use the default courses mapping from the Indexer
        indices_client.put_mapping(
            body=CoursesIndexer.mapping, doc_type="course", index="test_courses"
        )
        # Add the sorting script
        ES_CLIENT.put_script(id="state", body=CoursesIndexer.scripts["state"])
        # Actually insert our courses in the index
        actions = [
            {
                "_id": course["id"],
                "_index": "test_courses",
                "_op_type": "create",
                "_type": "course",
                "absolute_url": {"en": "url"},
                "cover_image": {"en": "image"},
                "title": {"en": "title"},
                **course,
                "course_runs": [
                    {
                        "languages": course_run["languages"],
                        "start": arrow.utcnow().datetime,
                        "end": arrow.utcnow().datetime,
                        "enrollment_start": arrow.utcnow().datetime,
                        "enrollment_end": arrow.utcnow().datetime,
                    }
                    for course_run in course["course_runs"]
                ],
            }
            for course in COURSES
        ]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(f"/api/v1.0/courses/?{querystring:s}")
        self.assertEqual(response.status_code, 200)

        return json.loads(response.content)
    def execute_query(self, courses, querystring="", **extra):
        """
        Not a test.
        Prepare the ElasticSearch index and execute the query in it.
        """

        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index=COURSES_INDEX)

        # The index needs to be closed before we set an analyzer
        indices_client.close(index=COURSES_INDEX)
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index=COURSES_INDEX)
        indices_client.open(index=COURSES_INDEX)

        # Use the default courses mapping from the Indexer
        indices_client.put_mapping(body=CoursesIndexer.mapping,
                                   doc_type="course",
                                   index=COURSES_INDEX)
        # Add the sorting script
        ES_CLIENT.put_script(id="score", body=CoursesIndexer.scripts["score"])
        ES_CLIENT.put_script(id="state_field",
                             body=CoursesIndexer.scripts["state_field"])

        # Actually insert our courses in the index
        actions = [{
            "_id": course["id"],
            "_index": COURSES_INDEX,
            "_op_type": "create",
            "_type": "course",
            "absolute_url": {
                "en": "en/url",
                "fr": "fr/url"
            },
            "categories": ["1", "2", "3"],
            "cover_image": {
                "en": "en/image",
                "fr": "fr/image"
            },
            "is_meta": False,
            "logo": {
                "en": "/en/some/img.png",
                "fr": "/fr/some/img.png"
            },
            "nb_children": 0,
            "organizations": ["11", "12", "13"],
            **course,
        } for course in courses]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        results = self.client.get(
            f"/api/v1.0/courses/autocomplete/?{querystring:s}", **extra)
        self.assertEqual(results.status_code, 200)

        return json.loads(results.content)
예제 #10
0
def create_index():
    es = Elasticsearch()
    client = IndicesClient(es)
    
    try:
        client.delete('physicians')
    except Exception, e:
        print e
예제 #11
0
 def recreate_index(self, index_name, index_mapping):
     indices_client = IndicesClient(client=ES_CLIENT)
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name)
     indices_client.put_mapping(doc_type='page',
                                index=index_name,
                                body=index_mapping)
예제 #12
0
 def delete_all(self):
     """delete index"""
     try:
         indices_client = IndicesClient(self._es)
         indices_client.delete(index=self._index)
     except Exception as e:
         _eprint("exception on delete_index {}".format(e))
         pass
예제 #13
0
def create_index_mi():
    indices_client = IndicesClient(models.client)
    index_name = models.PostMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    indices_client.put_mapping(body=models.PostMap._meta.es_mapping,
                               index=index_name)
예제 #14
0
파일: fmi_admin.py 프로젝트: VinACE/FMI
def create_index_excel(excel_filename):
    indices_client = IndicesClient(models.client)
    index_name = 'excel'
    if len(excel_filename):
        doc_type = os.path.splitext(excel_filename)[0]
        index_name = 'excel_' + doc_type
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
예제 #15
0
def remover_indice(nome_indice):
    """Remove o indice do Elasticsearch.

    O indice de elasticsearch é análogo a uma tabela em um SGBD.
    """
    es = conectar_em_elastic_search()
    client_indice = IndicesClient(es)
    if client_indice.exists(index=[nome_indice]):
        client_indice.delete(nome_indice)
예제 #16
0
def create_index_pi():
    #   indices_client = IndicesClient(client=settings.ES_HOSTS)
    indices_client = IndicesClient(models.client)
    index_name = models.Review._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    indices_client.put_mapping(body=models.Review._meta.es_mapping,
                               index=index_name)
예제 #17
0
def create_index_si_sites():
    indices_client = IndicesClient(models.client)
    index_name = models.PageMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    indices_client.put_mapping(doc_type=models.PageMap._meta.es_type_name,
                               body=models.PageMap._meta.es_mapping,
                               index=index_name)
예제 #18
0
def create_index_bestmatch():
    indices_client = IndicesClient(models.client)
    index_name = models.bestmatchMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    #put_settings(models.bestmatchMap)
    indices_client.put_mapping(body=models.bestmatchMap._meta.es_mapping,
                               index=index_name)
예제 #19
0
 def deleteIndex(self):
     self.es = Elasticsearch([{
         'host': elasticConfig['host'],
         'port': elasticConfig['port']
     }])
     esIndices = IndicesClient(self.es)
     index = elasticConfig['index']
     doc_type = elasticConfig['doc_type']
     esIndices.delete(index=index)
예제 #20
0
 def recreate_index(self):
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = Student._meta.es_index_name
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name)
     indices_client.put_mapping(doc_type=Student._meta.es_type_name,
                                body=Student._meta.es_mapping,
                                index=index_name)
예제 #21
0
def create_index_survey():
    indices_client = IndicesClient(models.client)
    index_name = models.SurveyMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    #put_settings(models.ScentemotionMap)
    indices_client.put_mapping(doc_type=models.SurveyMap._meta.es_type_name,
                               body=models.SurveyMap._meta.es_mapping,
                               index=index_name)
예제 #22
0
def create_index_dhk():
    indices_client = IndicesClient(models.client)
    index_name = 'recipes'
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    indices_client.put_mapping(
        # ES7.0 does not support types anymore doc_type=index_name,
        body={'properties': wb_excel.recipes},
        index=index_name)
예제 #23
0
def create_index_mi_feedly():
    indices_client = IndicesClient(models.client)
    index_name = models.FeedlyMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    #put_settings(models.FeedlyMap)
    indices_client.put_mapping(doc_type=models.FeedlyMap._meta.es_type_name,
                               body=models.FeedlyMap._meta.es_mapping,
                               index=index_name)
예제 #24
0
 def initialize(self, idx):
     es_index, es_doctype = self.indexinfo(idx)
     self.logger.info("Initializing %s" % es_index)
     idx_client = IndicesClient(self.es)
     if idx_client.exists(es_index):
         idx_client.delete(es_index)
     idx_client.create(es_index)
     if idx == 'event':
         idx_client.put_mapping(doc_type=es_doctype, index=[es_index], body=event_mapping())
     self.logger.info("%s ready." % es_index)
예제 #25
0
 def recreate_index(self):
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = Student._meta.es_index_name
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name)
     indices_client.put_mapping(
         doc_type=Student._meta.es_type_name,
         body=Student._meta.es_mapping,
         index=index_name
     )
예제 #26
0
 def recreate_index(self):
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = es_index_name
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name, body=es_ind_settings)
     for model_name in es_models:
         indices_client.put_mapping(
             doc_type=model_es_indices[model_name]['type'],
             body=es_mappings[model_name],
             index=es_index_name)
예제 #27
0
 def recreate_index(self):
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = self.es_index_name
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name, body=self.es_ind_settings)
     ## create mapping for one model only for now
     model_name = 'place'
     indices_client.put_mapping(
         doc_type=model_es_indices[model_name]['type'],
         body=es_mappings[model_name],
         index=index_name)
예제 #28
0
 def recreateIndex(self):
     """function to recreate the index in tge elasticsearch"""
     print("delete the previous index and creating th new one...")
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = Product._meta.es_index_name
     type_type = Product._meta.es_type_name
     if indices_client.exists(index=index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index_name)
     indices_client.put_mapping(doc_type=Product._meta.es_type_name,
                                body=Product._meta.es_mapping,
                                index=index_name)
예제 #29
0
    def delete_index(self, es):
        """
        Delete the dataset index.

        :param es: Elasticsearch client instance
        :type es: elasticsearch.client.Elasticsearch
        :rtype : NewsgroupsDataset
        """
        ic = IndicesClient(es)
        ic.delete(index=self.es_index, ignore=[400, 404])

        return self
예제 #30
0
파일: rotind.py 프로젝트: igoral5/synchro
def main():
    es_client = Elasticsearch([{'host': args.host, 'port': args.port}])
    es_index = IndicesClient(es_client)
    list_indexes = [index for index in es_index.status()['indices']]
    regexp = re.compile(u'(\d{4})\.(\d{2})\.(\d{2})', re.IGNORECASE | re.UNICODE )
    current_date = datetime.date.today()
    for index in list_indexes:
        res = regexp.search(index)
        if res:
            date_indx = datetime.date(year=int(res.group(1)), month=int(res.group(2)), day=int(res.group(3)))
            if (current_date - date_indx).days > args.old:
                es_index.delete(index)
예제 #31
0
    def execute_query(self, kind, querystring=""):
        """
        Not a test.
        This method is doing the heavy lifting for the tests in this class: create and fill the
        index with our categories so we can run our queries and check the results.
        It also executes the query and returns the result from the API.
        """
        # Index these categories in Elasticsearch
        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indexes so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index="test_categories")
        indices_client.close(index="test_categories")
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index="test_categories")
        indices_client.open(index="test_categories")

        # Use the default categories mapping from the Indexer
        indices_client.put_mapping(body=CategoriesIndexer.mapping,
                                   doc_type="category",
                                   index="test_categories")

        # Actually insert our categories in the index
        actions = [{
            "_id": category["id"],
            "_index": "test_categories",
            "_op_type": "create",
            "_type": "category",
            "absolute_url": {
                "en": "en/url"
            },
            "description": {
                "en": "en/description"
            },
            "icon": {
                "en": "en/icon"
            },
            "is_meta": False,
            "logo": {
                "en": "en/logo"
            },
            "nb_children": 0,
            "path": category["id"],
            **category,
        } for category in CATEGORIES]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(f"/api/v1.0/{kind:s}/?{querystring:s}")
        self.assertEqual(response.status_code, 200)

        return json.loads(response.content)
예제 #32
0
    def execute_query(self, querystring=""):
        """
        Not a test.
        This method is doing the heavy lifting for the tests in this class: create and fill the
        index with our organizations so we can run our queries and check the results.
        It also executes the query and returns the result from the API.
        """
        # Index these organizations in Elasticsearch
        indices_client = IndicesClient(client=ES_CLIENT)
        # Delete any existing indices so we get a clean slate
        indices_client.delete(index="_all")
        # Create an index we'll use to test the ES features
        indices_client.create(index="test_organizations")
        indices_client.close(index="test_organizations")
        indices_client.put_settings(body=ANALYSIS_SETTINGS,
                                    index="test_organizations")
        indices_client.open(index="test_organizations")

        # Use the default organizations mapping from the Indexer
        indices_client.put_mapping(
            body=OrganizationsIndexer.mapping,
            doc_type="organization",
            index="test_organizations",
        )

        # Actually insert our organizations in the index
        actions = [{
            "_id": organization["id"],
            "_index": "test_organizations",
            "_op_type": "create",
            "_type": "organization",
            "absolute_url": {
                "en": "en/url"
            },
            "description": {
                "en": "en/description"
            },
            "logo": {
                "en": "en/image"
            },
            **organization,
        } for organization in ORGANIZATIONS]
        bulk(actions=actions, chunk_size=500, client=ES_CLIENT)
        indices_client.refresh()

        response = self.client.get(f"/api/v1.0/organizations/?{querystring:s}")
        self.assertEqual(response.status_code, 200)

        return json.loads(response.content)
def init_search(app, clean=False):
    """ Create a client and attach it to the app """
    elastic_hosts = json.loads(app.config.get('ELASTIC_HOSTS'))
    app.elastic_client = Elasticsearch(elastic_hosts)
    app.elastic_index_name = app.config.get('ELASTIC_INDEX')

    index_client = IndicesClient(app.elastic_client)
    log.info("Checking for index {} at {}".format(app.elastic_index_name, elastic_hosts))

    if index_client.exists(app.elastic_index_name):
        if clean:
            index_client.delete(app.elastic_index_name)
            index_client.create(app.elastic_index_name)
    else:
        index_client.create(app.elastic_index_name)
예제 #34
0
class IndexBase:

    def __init__(self, **kwargs):
        self.index = kwargs.pop('index')
        self.client = client_es
        self.client_index = IndicesClient(self.client)

        if kwargs.get('settings')
            self.settings = kwargs.pop('settings')
        else:
            self.settings = DEFAULT_SETTINGS

        if self.exist_index():
            self.delete_index()
            self.create_index()
        else:
            self.create_index()

    def exist_index(self):
        return self.client_index.exists(index=self.index)

    def delete_index(self):
        return self.client_index.delete(index=self.index, ignore=[400, 404])

    def create_index(self):
        return self.client_index.create(index=self.index, body=self.settings)
예제 #35
0
    def _reset_mapping(self, mapping_path):
        esi = IndicesClient(es.get_es_handle())
        index = settings.ES_INDEX

        if not esi.exists(index):
            raise CommandError("Non existing index : %s"%index)

        self.stdout.write(str(esi.delete(index=index)))
def import_ontology(ontology: lib.obo.Ontology, index_name: str):
    es = elasticsearch.Elasticsearch()

    ies = IndicesClient(es)

    actions = [dict(
        _index=index_name,
        _type=index_name,
        _source=dict(
            id=item.id,
            names=item.names()
        )
    ) for item in ontology.items()]

    if ies.exists(index_name):
        ies.delete(index_name)
    ies.create(index_name)
    return bulk(es, actions=actions)
예제 #37
0
 def _remove_index_if_exists():
     es = elasticsearch.Elasticsearch()
     from elasticsearch.client import IndicesClient
     es_index = IndicesClient(es)
     if es_index.exists(STORAGE_INDEX_NAME):
         logger.info("Elasticsearch index '{0}' already exists and "
                     "will be deleted".format(STORAGE_INDEX_NAME))
         try:
             es_index.delete(STORAGE_INDEX_NAME)
             logger.info('Verifying Elasticsearch index was deleted...')
             deadline = time.time() + 45
             while es_index.exists(STORAGE_INDEX_NAME):
                 if time.time() > deadline:
                     raise RuntimeError(
                         'Elasticsearch index was not deleted after '
                         '30 seconds')
                 time.sleep(0.5)
         except BaseException as e:
             logger.warn('Ignoring caught exception on Elasticsearch delete'
                         ' index - {0}: {1}'.format(e.__class__, e.message))
예제 #38
0
 def _remove_index_if_exists():
     es = elasticsearch.Elasticsearch()
     from elasticsearch.client import IndicesClient
     es_index = IndicesClient(es)
     if es_index.exists(STORAGE_INDEX_NAME):
         logger.info(
             "Elasticsearch index '{0}' already exists and "
             "will be deleted".format(STORAGE_INDEX_NAME))
         try:
             es_index.delete(STORAGE_INDEX_NAME)
             logger.info('Verifying Elasticsearch index was deleted...')
             deadline = time.time() + 45
             while es_index.exists(STORAGE_INDEX_NAME):
                 if time.time() > deadline:
                     raise RuntimeError(
                         'Elasticsearch index was not deleted after '
                         '30 seconds')
                 time.sleep(0.5)
         except BaseException as e:
             logger.warn('Ignoring caught exception on Elasticsearch delete'
                         ' index - {0}: {1}'.format(e.__class__, e.message))
예제 #39
0
 def remove_log_indices():
     es = elasticsearch.Elasticsearch()
     from elasticsearch.client import IndicesClient
     es_index = IndicesClient(es)
     log_index_pattern = '{0}*'.format(LOG_INDICES_PREFIX)
     if es_index.exists(log_index_pattern):
         logger.info(
             "Elasticsearch indices '{0}' already exist and "
             "will be deleted".format(log_index_pattern))
         try:
             es_index.delete(log_index_pattern)
             logger.info('Verifying Elasticsearch index was deleted...')
             deadline = time.time() + 45
             while es_index.exists(log_index_pattern):
                 if time.time() > deadline:
                     raise RuntimeError(
                         'Elasticsearch index was not deleted after '
                         '30 seconds')
                 time.sleep(0.5)
         except BaseException as e:
             logger.warn('Ignoring caught exception on Elasticsearch delete'
                         ' index - {0}: {1}'.format(e.__class__, e.message))
예제 #40
0
def setup(forced):
	properties = {}
	properties["fail_symptom"] = {"type" : "string", "index": "not_analyzed"}
	properties["ats_log"] = {"type" : "string"}
	properties["file_path"] = {"type" : "string", "analyzer": "path-analyzer"}
	add_unique_mapping(properties, "Test Start Time", {"VALUE" : {"type" : "date", "format": "yyyy/MM/dd HH:mm:ssZ||yyyy/MM/ddZ"}})
	add_unique_mapping(properties, "Test end Time", {"VALUE" : {"type" : "date", "format": "yyyy/MM/dd HH:mm:ssZ||yyyy/MM/ddZ"}})

	es = Elasticsearch([{'host': 'localhost', 'port': 9200}], max_retries=10, retry_on_timeout=True)
	idx_client = IndicesClient(es)
	if (idx_client.exists(index=PROJECT)):
		if (forced):
			idx_client.delete(index=PROJECT)
		else :
			print "Index already exists!"
			return

	runin_csv_status = {"runin_csv_status" : {"path_match": "RunInLog.*.STATUS", "mapping": {"index": "not_analyzed"}}}
	runin_csv_value = {"runin_csv_value" : {"path_match": "RunInLog.*.VALUE", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	runin_csv_u_limit = {"runin_csv_u_limit" : {"path_match": "RunInLog.*.U_LIMIT", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	runin_csv_l_limit = {"runin_csv_l_limit" : {"path_match": "RunInLog.*.L_LIMIT", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	runin_csv_test_time = {"runin_csv_test_time" : {"path_match": "RunInLog.*.TEST_TIME", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	csv_status = {"csv_status" : {"path_match": "*.STATUS", "mapping": {"index": "not_analyzed"}}}
	csv_value = {"csv_value" : {"path_match": "*.VALUE", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	csv_u_limit = {"csv_u_limit" : {"path_match": "*.U_LIMIT", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	csv_l_limit = {"csv_l_limit" : {"path_match": "*.L_LIMIT", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	csv_test_time = {"csv_test_time" : {"path_match": "*.TEST_TIME", "mapping": {"index": "not_analyzed", "fields" : {"double" : {"type" : "double"}}}}}
	dynamic_templates = [runin_csv_status, runin_csv_value, runin_csv_u_limit, runin_csv_l_limit, runin_csv_test_time, csv_status, csv_value, csv_u_limit, csv_l_limit, csv_test_time]

	analysis = {}
	analysis["analyzer"] = {}
	analysis["tokenizer"] = {}
	analysis["analyzer"]["path-analyzer"] = {"type": "custom", "tokenizer": "path-tokenizer"}
	analysis["tokenizer"]["path-tokenizer"] = {"type": "path_hierarchy"}

	mappings = {"dynamic_templates" : dynamic_templates, "properties" : properties}
	data = {"settings" : {"index.mapping.ignore_malformed": True, "number_of_replicas": 1, "analysis": analysis}, "mappings" : {STAGE: mappings}}
	print json.dumps(data)
	idx_client.create(index=PROJECT, body=data)
예제 #41
0
class RedisEsSetupMixin(object):

    def setUp(self):
        self.settings = TEST_SETTINGS_OBJECT
        self.es = get_es(self.settings)
        self.esi = IndicesClient(self.es)

        self.index = self.settings.get("ES_INDEX")

        #create the index firstly
        if self.esi.exists(self.index):
            self.esi.delete(index=self.index)

        self.esi.create(index=self.index)

        mapping_path = os.path.join(SCRAPY_ROOT,
                                 "resources/mappings.json")

        mapping_str = open(mapping_path, "r").read()
        mappings = json.loads(mapping_str)


        for k,v in mappings.iteritems():
            res = self.esi.put_mapping(self.index, k, {k:mappings[k]})
            #print res


        self.redis_conn = get_redis(self.settings)


    def tearDown(self):
        if self.esi.exists(self.index):
            self.esi.delete(index=self.index)
            print "ES INDEX DELETED"

        #remove redis stuff
        self.redis_conn.flushdb()
        print "REDIS DB DELETED"
예제 #42
0
def delete_index(name):
    es = get_es()
    ic = IndicesClient(es)
    resp = ic.delete(name)
    logger.debug('index delete: ' + str(resp))
예제 #43
0
def schema_setup(es, project, forced, logger=None):
    properties = {}
    properties["fail_symptom"] = {"type": "string", "index": "not_analyzed"}
    properties["ats_log"] = {"type": "string"}
    properties["file_path"] = {"type": "string", "analyzer": "path-analyzer"}
    add_unique_mapping(
        properties, "Test Start Time", {"VALUE": {"type": "date", "format": "yyyy/MM/dd HH:mm:ssZ||yyyy/MM/ddZ"}}
    )
    add_unique_mapping(
        properties, "Test end Time", {"VALUE": {"type": "date", "format": "yyyy/MM/dd HH:mm:ssZ||yyyy/MM/ddZ"}}
    )

    idx_client = IndicesClient(es)
    if idx_client.exists(index=project):
        if forced:
            idx_client.delete(index=project)
        else:
            print "Index already exists!"
            return

    runin_csv_status = {"runin_csv_status": {"path_match": "RunInLog.*.STATUS", "mapping": {"index": "not_analyzed"}}}
    runin_csv_value = {
        "runin_csv_value": {
            "path_match": "RunInLog.*.VALUE",
            "mapping": {"index": "not_analyzed", "fields": {"double": {"type": "double"}}},
        }
    }
    runin_csv_u_limit = {
        "runin_csv_u_limit": {
            "path_match": "RunInLog.*.U_LIMIT",
            "mapping": {"index": "not_analyzed", "fields": {"double": {"type": "double"}}},
        }
    }
    runin_csv_l_limit = {
        "runin_csv_l_limit": {
            "path_match": "RunInLog.*.L_LIMIT",
            "mapping": {"index": "not_analyzed", "fields": {"double": {"type": "double"}}},
        }
    }
    runin_csv_test_time = {
        "runin_csv_test_time": {
            "path_match": "RunInLog.*.TEST_TIME",
            "mapping": {"index": "not_analyzed", "fields": {"double": {"type": "double"}}},
        }
    }
    csv_status = {"csv_status": {"path_match": "*.STATUS", "mapping": {"index": "not_analyzed"}}}
    csv_value = {
        "csv_value": {
            "path_match": "*.VALUE",
            "mapping": {"index": "not_analyzed", "fields": {"double": {"type": "double"}}},
        }
    }
    csv_u_limit = {
        "csv_u_limit": {
            "path_match": "*.U_LIMIT",
            "mapping": {"index": "not_analyzed", "fields": {"double": {"type": "double"}}},
        }
    }
    csv_l_limit = {
        "csv_l_limit": {
            "path_match": "*.L_LIMIT",
            "mapping": {"index": "not_analyzed", "fields": {"double": {"type": "double"}}},
        }
    }
    csv_test_time = {
        "csv_test_time": {
            "path_match": "*.TEST_TIME",
            "mapping": {"index": "not_analyzed", "fields": {"double": {"type": "double"}}},
        }
    }
    dynamic_templates = [
        runin_csv_status,
        runin_csv_value,
        runin_csv_u_limit,
        runin_csv_l_limit,
        runin_csv_test_time,
        csv_status,
        csv_value,
        csv_u_limit,
        csv_l_limit,
        csv_test_time,
    ]

    analysis = {}
    analysis["analyzer"] = {}
    analysis["tokenizer"] = {}
    analysis["analyzer"]["path-analyzer"] = {"type": "custom", "tokenizer": "path-tokenizer"}
    analysis["tokenizer"]["path-tokenizer"] = {"type": "path_hierarchy"}

    mappings = {"dynamic_templates": dynamic_templates, "properties": properties}
    data = {
        "settings": {"index.mapping.ignore_malformed": True, "number_of_replicas": 1, "analysis": analysis},
        "mappings": {ES_DOC_TYPE: mappings},
    }
    if logger == None:
        print json.dumps(data)
    else:
        logger.info("Schema: %s" % json.dumps(data))
    idx_client.create(index=project, body=data)
예제 #44
0
class ElasticSearchEngine(object):
    '''
    ElasticSearch Engine.
    '''

    def __init__(self, index, host=None, port=None):
        '''Only one host for now.'''
        assert(index.isalpha())
        self.init_state(index, host, port)

    def init_state(self, index, host, port):
        self._queue = []
        self.index = index
        self.host = host
        self.port = port
        if host is None:
            self.es = Elasticsearch()
        else:
            self.es = Elasticsearch(hosts=[{'host': host, 'port': port}])
        self.idx_manager = IndicesClient(self.es)
        self.mapper = ESMapper()

    # be persistence friendly
    def __getstate__(self):
        return (self.index, self.host, self.port)

    def __setstate__(self, state):
        self.init_state(*state)

    def _index(self, document, update=False):
        # for efficiency, nothing is executed yet,
        # we prepare and queue the operation
        doc = 'doc' if update else '_source'
        op = {
            '_index': self.index,
            '_type': document.__class__.__name__,
            '_op_type': 'update' if update else 'create',
            '_id': document._id,
            doc: {k: getattr(document, k)
                  for k in document.fields
                  if getattr(document, k) is not None}
        }
        self._queue.append(op)

    def add_document(self, document):
        '''
        Add a document to the data store, in index (a.k.a. collection),
        under the document type.
        '''
        self._index(document)

    def delete_document(self, doctype, docid):
        '''
        Remove document from index and storage.
        '''
        op = {
            '_op_type': 'delete',
            '_index': self.index,
            '_type': doctype.__name__,
            '_id': docid
        }
        self._queue.append(op)

    def update_document(self, document):
        '''Update document (partial update from delta document)'''
        self._index(document, True)

    def commit(self, sync=False):
        '''
        If ``sync``, index synchronously, else let Elasticsearch
        manage its index.
        '''
        helpers.bulk(self.es, self._queue)
        if sync:
            self.idx_manager.refresh(self.index)
        self._queue = []

    def cancel(self):
        '''
        Forget operation scheduled since last commit'''
        self._queue = []

    def search(self, query, size=20):
        '''
        Search the database.
        '''
        dsl = query(self.mapper)
        hits = self.es.search(index=self.index,
                              doc_type=query.queried_doc.__name__,
                              body={'query': dsl},
                              size=size)
        res = [
            (h['_score'], query.queried_doc.delta(h['_id'],
                                                  **h['_source']))
            for h in hits['hits']['hits']
        ]
        return res

    def delete_collection(self):
        if self.idx_manager.exists(self.index):
            self.idx_manager.delete(index=self.index)

    def create_collection(self, schema):
        '''
        Init the collections the first time.
        Just use once! Or you'll have to reindex all your documents.
        Schema is a list of Document classes.
        '''

        idx_manager = self.idx_manager
        if idx_manager.exists(self.index):
            idx_manager.delete(index=self.index)

        mappings = {}
        for doctype in schema:
            properties = {'_full': {"type": "string",
                                    "index_analyzer":  "autocomplete",
                                    "search_analyzer": "standard"}}
            excludes = []
            for name, ftype in doctype.fields.iteritems():
                properties[name] = ESProperty(ftype)
                if not ftype.stored:
                    excludes.append(name)
            mappings[doctype.__name__] = {'properties': properties,
                                          '_source': {"excludes": excludes}}
        settings = {
            "number_of_shards": 1,
            "analysis": {
                "filter": {
                    "autocomplete_filter": {
                        "type":     "edge_ngram",
                        "min_gram": 1,
                        "max_gram": 20
                    }
                },
                "analyzer": {
                    "autocomplete": {
                        "type":      "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "autocomplete_filter"
                        ]
                    }
                }
            }
        }
        body = {"mappings": mappings, "settings": settings}
        idx_manager.create(index=self.index, body=body)
예제 #45
0
def remover_indice(nome_indice):
    es = conectar_em_elastic_search()
    client_indice = IndicesClient(es)
    if client_indice.exists(index=[nome_indice]):
        client_indice.delete(nome_indice)
class ESIndexManager(object):
    def __init__(self, es_config=None):
        if not es_config:
            es_config = SMConfig.get_conf()['elasticsearch']
        self._es = init_es_conn(es_config)
        self._ind_client = IndicesClient(self._es)

    def internal_index_name(self, alias):
        yin, yang = '{}-yin'.format(alias), '{}-yang'.format(alias)
        assert not (self.exists_index(yin) and self.exists_index(yang)), \
            'Only one of {} and {} should exist'.format(yin, yang)

        if self.exists_index(yin):
            return yin
        elif self.exists_index(yang):
            return yang
        else:
            return yin

    def create_index(self, index):
        dynamic_templates = [{
            "strings": {
                "match_mapping_type": "string",
                    "mapping": {
                        "type": "keyword",
                        "normalizer": "default"}}
        }]
        body = {
            "settings": {
                "index": {
                    "number_of_shards": 1,
                    "number_of_replicas": 0,
                    "max_result_window": 2147483647,
                    "analysis": {
                        "normalizer": {
                            "default": {
                                "type": "custom",
                                "filter": ["lowercase", "asciifolding"]
                            }
                        }
                    }}},
            "mappings": {
                "dataset": {
                    "dynamic_templates": dynamic_templates,
                    "properties": {
                        "ds_id": {"type": "keyword"}
                    }
                },
                "annotation": {
                    "dynamic_templates": dynamic_templates,
                    "properties": {
                        "ds_id": {"type": "keyword"},
                        "chaos": {"type": "float"},
                        "image_corr": {"type": "float"},
                        "pattern_match": {"type": "float"},
                        "total_iso_ints": {"type": "float"},
                        "min_iso_ints": {"type": "float"},
                        "max_iso_ints": {"type": "float"},
                        "msm": {"type": "float"},
                        "fdr": {"type": "float"}}}}}

        if not self._ind_client.exists(index):
            out = self._ind_client.create(index=index, body=body)
            logger.info('Index {} created\n{}'.format(index, out))
        else:
            logger.info('Index {} already exists'.format(index))

    def delete_index(self, index):
        if self._ind_client.exists(index):
            out = self._ind_client.delete(index)
            logger.info('Index {} deleted: {}'.format(index, out))

    def exists_index(self, index):
        return self._ind_client.exists(index)

    def another_index_name(self, index):
        assert index.endswith('yin') or index.endswith('yang')

        if index.endswith('yin'):
            return index.replace('yin', 'yang')
        else:
            return index.replace('yang', 'yin')

    def remap_alias(self, new_index, alias='sm'):
        old_index = self.another_index_name(new_index)
        logger.info('Remapping {} alias: {} -> {}'.format(alias, old_index, new_index))

        self._ind_client.update_aliases({
            "actions": [{"add": {"index": new_index, "alias": alias}}]
        })
        if self._ind_client.exists_alias(old_index, alias):
            self._ind_client.update_aliases({
                "actions": [{"remove": {"index": old_index, "alias": alias}}]
            })
            out = self._ind_client.delete(index=old_index)
            logger.info('Index {} deleted: {}'.format(old_index, out))
예제 #47
0
def create_index_mappings(es_client, ea_index, recreate=False, old_ea_index=None):
    esversion = es_client.info()["version"]["number"]
    print("Elastic Version: " + esversion)

    es_index_mappings = read_es_index_mappings() if is_atleastsix(esversion) else read_es_index_mappings(5)

    es_index = IndicesClient(es_client)
    if not recreate:
        if es_index.exists(ea_index):
            print('Index ' + ea_index + ' already exists. Skipping index creation.')
            return None

    # (Re-)Create indices.
    if is_atleastsix(esversion):
        index_names = (
            ea_index,
            ea_index + '_status',
            ea_index + '_silence',
            ea_index + '_error',
            ea_index + '_past',
        )
    else:
        index_names = (
            ea_index,
        )
    for index_name in index_names:
        if es_index.exists(index_name):
            print('Deleting index ' + index_name + '.')
            try:
                es_index.delete(index_name)
            except NotFoundError:
                # Why does this ever occur?? It shouldn't. But it does.
                pass
        es_index.create(index_name)

    # To avoid a race condition. TODO: replace this with a real check
    time.sleep(2)

    if is_atleastseven(esversion):
        # TODO remove doc_type completely when elasicsearch client allows doc_type=None
        # doc_type is a deprecated feature and will be completely removed in Elasicsearch 8
        es_client.indices.put_mapping(index=ea_index, doc_type='_doc',
                                      body=es_index_mappings['elastalert'], include_type_name=True)
        es_client.indices.put_mapping(index=ea_index + '_status', doc_type='_doc',
                                      body=es_index_mappings['elastalert_status'], include_type_name=True)
        es_client.indices.put_mapping(index=ea_index + '_silence', doc_type='_doc',
                                      body=es_index_mappings['silence'], include_type_name=True)
        es_client.indices.put_mapping(index=ea_index + '_error', doc_type='_doc',
                                      body=es_index_mappings['elastalert_error'], include_type_name=True)
        es_client.indices.put_mapping(index=ea_index + '_past', doc_type='_doc',
                                      body=es_index_mappings['past_elastalert'], include_type_name=True)
    elif is_atleastsixtwo(esversion):
        es_client.indices.put_mapping(index=ea_index, doc_type='_doc',
                                      body=es_index_mappings['elastalert'])
        es_client.indices.put_mapping(index=ea_index + '_status', doc_type='_doc',
                                      body=es_index_mappings['elastalert_status'])
        es_client.indices.put_mapping(index=ea_index + '_silence', doc_type='_doc',
                                      body=es_index_mappings['silence'])
        es_client.indices.put_mapping(index=ea_index + '_error', doc_type='_doc',
                                      body=es_index_mappings['elastalert_error'])
        es_client.indices.put_mapping(index=ea_index + '_past', doc_type='_doc',
                                      body=es_index_mappings['past_elastalert'])
    elif is_atleastsix(esversion):
        es_client.indices.put_mapping(index=ea_index, doc_type='elastalert',
                                      body=es_index_mappings['elastalert'])
        es_client.indices.put_mapping(index=ea_index + '_status', doc_type='elastalert_status',
                                      body=es_index_mappings['elastalert_status'])
        es_client.indices.put_mapping(index=ea_index + '_silence', doc_type='silence',
                                      body=es_index_mappings['silence'])
        es_client.indices.put_mapping(index=ea_index + '_error', doc_type='elastalert_error',
                                      body=es_index_mappings['elastalert_error'])
        es_client.indices.put_mapping(index=ea_index + '_past', doc_type='past_elastalert',
                                      body=es_index_mappings['past_elastalert'])
    else:
        es_client.indices.put_mapping(index=ea_index, doc_type='elastalert',
                                      body=es_index_mappings['elastalert'])
        es_client.indices.put_mapping(index=ea_index, doc_type='elastalert_status',
                                      body=es_index_mappings['elastalert_status'])
        es_client.indices.put_mapping(index=ea_index, doc_type='silence',
                                      body=es_index_mappings['silence'])
        es_client.indices.put_mapping(index=ea_index, doc_type='elastalert_error',
                                      body=es_index_mappings['elastalert_error'])
        es_client.indices.put_mapping(index=ea_index, doc_type='past_elastalert',
                                      body=es_index_mappings['past_elastalert'])

    print('New index %s created' % ea_index)
    if old_ea_index:
        print("Copying all data from old index '{0}' to new index '{1}'".format(old_ea_index, ea_index))
        # Use the defaults for chunk_size, scroll, scan_kwargs, and bulk_kwargs
        elasticsearch.helpers.reindex(es_client, old_ea_index, ea_index)

    print('Done!')
예제 #48
0
def remover_indice(nome_indice):
    es = conectar_em_elastic_search()
    client_indice = IndicesClient(es)
    client_indice.delete(nome_indice)
    def tearDown(self):
        super(TestESTermIndexWeightingProvider, self).tearDown()

        ic = IndicesClient(self.es)
        ic.delete(self.index)
예제 #50
0
            index='kaggle'
            ):

        count += 1

    print ("Done indexing")

if __name__ == '__main__':

    client = Elasticsearch()
    indices_client = IndicesClient(client)

    # Index the data only if passed in an argument to the script.
    if len(sys.argv) > 1:
        # Create index and mappings
        indices_client.delete(index='kaggle', ignore=404)
        create_index(indices_client)
        for cuisine in cuisines:
            create_cuisine_mapping(indices_client, cuisine)
        index_training_data(client)


    print("Analyzing: results will be posted in submissions.csv")
    with open('submission.csv', 'w') as sol:
        print('id,cuisine', file=sol)

        with open('test.json') as f:
            recipes = json.load(f)

            count = 0
            for recipe in recipes:
예제 #51
0
    def handle(self, *args, **options):
        Student.objects.all().delete()
        University.objects.all().delete()
        Course.objects.all().delete()
        start = time.time()

        # database part
        # make some Universities
        university_names = (
            'MIT', 'MGU', 'CalTech', 'KPI', 'DPI', 'PSTU'
        )
        universities = []
        for name in university_names:
            uni = mommy.make(University, name=name)
            universities.append(uni)
        # make some courses
        template_options = ['CS%s0%s', 'MATH%s0%s', 'CHEM%s0%s', 'PHYS%s0%s']
        courses = []
        for num in range(1, 4):
            for course_num in range(1, 4):
                for template in template_options:
                    name = template % (course_num, num)
                    course = mommy.make(Course, name=name)
                    courses.append(course)

        students = []
        for _ in xrange(options.get('count')[0]):
            stud = mommy.prepare(
                Student,
                university=random.choice(universities),
                first_name=names.get_first_name(),
                last_name=names.get_last_name(),
                age=random.randint(17, 25)
            )
            students.append(stud)
        Student.objects.bulk_create(students)

        ThroughModel = Student.courses.through
        stud_courses = []
        for student_id in Student.objects.values_list('pk', flat=True):
            courses_already_linked = []
            for _ in range(random.randint(1, 10)):
                index = random.randint(0, len(courses) - 1)
                if index not in courses_already_linked:
                    courses_already_linked.append(index)
                else:
                    continue
                stud_courses.append(
                    ThroughModel(
                        student_id=student_id,
                        course_id=courses[index].pk
                    )
                )
        ThroughModel.objects.bulk_create(stud_courses)

        # recreate index
        indices_client = IndicesClient(client=settings.ES_CLIENT)
        if indices_client.exists('django'):
            indices_client.delete(index='django')
        indices_client.create(index='django')
        indices_client.put_mapping(
            doc_type='student',
            body=Student._meta.es_mapping,
            index='django'
        )
        # update part
        put_all_to_index(Student)

        finish = time.time() - start
        print '%s items  %s seconds' % (options.get('count')[0], finish)
예제 #52
0
class ElasticSearchEngine(object):
    '''
    ElasticSearch Engine.
    '''

    # make it compatible with services
    LOAD_PRIORITY = 30

    def __init__(self, index, host=None, port=None):
        '''Only one host for now.'''
        if not es_installed:
            raise ValueError('elasticsearch not installed')

        assert(index.isalpha())
        self.init_state(index, host, port)

    def init_state(self, index, host, port):
        self._queue = []
        self.index = index
        self.host = host
        self.port = port
        if host is None:
            self.es = Elasticsearch()
        else:
            self.es = Elasticsearch(hosts=[{'host': host, 'port': port}])
        self.idx_manager = IndicesClient(self.es)
        self.mapper = ESQueryMapper()

    # be persistence friendly
    def __getstate__(self):
        return (self.index, self.host, self.port)

    def __setstate__(self, state):
        self.init_state(*state)

    def _index(self, document, update=False):
        # for efficiency, nothing is executed yet,
        # we prepare and queue the operation
        cursor = IndexCursor(self.index)
        document.save(cursor, update)
        cursor.enqueue(self._queue)

    def add_document(self, document):
        '''
        Add a document to the data store, in index (a.k.a. collection),
        under the document type.
        '''
        self._index(document)

    def delete_document(self, schema, docid):
        '''
        Remove document from index and storage.
        '''
        op = {
            '_op_type': 'delete',
            '_index': self.index,
            '_type': schema.type_name,
            '_id': docid
        }
        self._queue.append(op)

    def update_document(self, document):
        '''Update document (partial update from delta document)'''
        self._index(document, True)

    def commit(self, sync=False):
        '''
        If ``sync``, index synchronously, else let Elasticsearch
        manage its index.
        '''
        helpers.bulk(self.es, self._queue)
        if sync:
            self.idx_manager.refresh(self.index)
        self._queue = []

    def cancel(self):
        '''
        Forget operation scheduled since last commit'''
        self._queue = []

    def search(self, query, size=20):
        '''
        Search the database.
        '''
        index_cursor = IndexCursor(self.index, self.es.search)
        return query.search(index_cursor, self.mapper, size)

    def delete_collection(self):
        if self.idx_manager.exists(self.index):
            self.idx_manager.delete(index=self.index)

    def create_collection(self, schemas):
        '''
        Init the collections the first time.
        Just use once! Or you'll have to reindex all your documents.
        `schemas` is a list of Document classes or Schema instances.
        '''

        idx_manager = self.idx_manager
        if idx_manager.exists(self.index):
            idx_manager.delete(index=self.index)

        mapper = ESSchemaMapper(idx_manager)
        for schema in schemas:
            schema.map(mapper)

        mapper.create(self.index)
예제 #53
0
def main(argv):
    index = 'user_topics'
    client = Elasticsearch('localhost:9200')
    index_client = IndicesClient(client)
    
    if index_client.exists(index):
        index_client.delete(index)
    
    index_client.create(index=index, body={
        'settings': {
            'number_of_shards':   4,
            'number_of_replicas': 0
        },
        'mappings': {
            'user': {
                'properties': {
                    #'id': {
                    #    'type': 'long',
                    #    'doc_values': True
                    #},
                    'topics': {
                        'type': 'integer',
                        'doc_values': True
                    },
                    'n_topics': {
                        'type': 'integer',
                        'doc_values': True
                    }
                }
            }
        }
    })
    
    n_users           = int(argv[1])
    n_topics          = int(argv[2]) * 0.15
    n_topics_per_user = int(argv[3]) * 4.2
    
    docs_per_chunk = int(2e4)
    n_chunks       = int(ceil(n_users / docs_per_chunk))
    
    start_time = time.time()
    
    for i_chunk in range(1, n_chunks+1):
        docs = []
        
        for i in range(docs_per_chunk):
            n_user_topics = rand(n_topics_per_user)[0]
            topics = list(set(rand(n_topics, n_user_topics)))
            
            doc_id = str(random.getrandbits(63))

            docs.append('{"index":{"_index": "%s", "_type": "user", "_id": "%s"}})' % (index, doc_id))
            docs.append(json.dumps({
                #'id':      doc_id,
                'topics':   topics,
                'n_topics': len(topics)
            }))
        
        #print(json.dumps(json.loads(docs[1]), indent=4)); return
        
        try:
            response = client.bulk(body='\n'.join(docs))
        except:
            # Even when an exception is thrown typically documents were stored in ES
            sleep_seconds = 10
            print('\rHTTP timed out, sleeping %d seconds...' % sleep_seconds)
            time.sleep(sleep_seconds)
        
        print('\rChunk %5d/%d, %5.2f%%' % (i_chunk, n_chunks, i_chunk*100.0/n_chunks), end='')
    
    index_time = time.time()
    print('\nCalling optimize, indexing took %.1f s...' % (index_time - start_time))
    sys.stdout.flush()
    
    index_client.optimize(index=index, max_num_segments=3, request_timeout=1e6)
    print('Optimization done in %.1f s' % (time.time() - index_time))
예제 #54
0
from elasticsearch import Elasticsearch
from elasticsearch.client import IndicesClient
from elasticsearch_dsl import Mapping, String, Search

es = Elasticsearch()
ies = IndicesClient(es)

ies.delete('test')
ies.create('test')
ies.close('test')

ies.put_settings(index='test', body={
    "analysis":{
      "analyzer":{
        "default":{
          "type":"custom",
          "tokenizer":"standard",
          "filter":[ "standard", "lowercase", "stop", "kstem" ]
        }
      }
    }
})



m = Mapping('test')
m.field('f', String())
m.save(index='test', using=es)

ies.open(index='test')
class TestESTermAggregationWeightProvider(TestCase):

    def setUp(self):
        super(TestESTermAggregationWeightProvider, self).setUp()

        self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'es_term_weight_provider_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index, self.doc_type, {self.field: 'foo'})
        self.es.create(self.index, self.doc_type, {self.field: 'knark'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'knirk'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'knark '})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'}, refresh=True)

    def tearDown(self):
        super(TestESTermAggregationWeightProvider, self).tearDown()

        self.ic.delete(self.index)


    def test_getitem_single(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=False)

        term, w = provider['ba']
        self.assertEqual('ba', term)
        self.assertAlmostEqual(.5, w)
        term, w = provider['knark']
        self.assertEqual('knark', term)
        self.assertAlmostEqual(.25, w)
        term, w = provider['knirk']
        self.assertEqual('knirk', term)
        self.assertAlmostEqual(.125, w)
        term, w = provider['foo']
        self.assertEqual('foo', term)
        self.assertAlmostEqual(.125, w)

    def test_inverse(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=True, sublinear=False)
        term, w = provider['ba']
        self.assertEqual('ba', term)
        self.assertAlmostEqual(2., w)
        term, w = provider['knark']
        self.assertEqual('knark', term)
        self.assertAlmostEqual(4., w)
        term, w = provider['knirk']
        self.assertEqual('knirk', term)
        self.assertAlmostEqual(8., w)
        term, w = provider['foo']
        self.assertEqual('foo', term)
        self.assertAlmostEqual(8., w)

    def test_sublinear(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=True)
        term, w = provider['ba']
        self.assertEqual('ba', term)
        self.assertAlmostEqual(-0.693147, w, places=4)
        term, w = provider['knark']
        self.assertEqual('knark', term)
        self.assertAlmostEqual(-1.386294, w, places=4)
        term, w = provider['knirk']
        self.assertEqual('knirk', term)
        self.assertAlmostEqual(-2.079442, w, places=4)
        term, w = provider['foo']
        self.assertEqual('foo', term)
        self.assertAlmostEqual(-2.079442, w, places=4)

    def test_inverse_sublinear(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=True, sublinear=True)
        term, w = provider['ba']
        self.assertEqual('ba', term)
        self.assertAlmostEqual(0.693147, w, places=4)
        term, w = provider['knark']
        self.assertEqual('knark', term)
        self.assertAlmostEqual(1.386294, w, places=4)
        term, w = provider['knirk']
        self.assertEqual('knirk', term)
        self.assertAlmostEqual(2.079442, w, places=4)
        term, w = provider['foo']
        self.assertEqual('foo', term)
        self.assertAlmostEqual(2.079442, w, places=4)

    def test_getitem_multiple(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=False)

        weights = dict(provider[['ba', 'foo', 'knark', 'knirk']])
        self.assertEqual(['ba', 'foo', 'knark', 'knirk'], sorted(weights.keys()))
        self.assertAlmostEqual(weights['ba'], .5)
        self.assertAlmostEqual(weights['knark'], .25)
        self.assertAlmostEqual(weights['knirk'], .125)
        self.assertAlmostEqual(weights['foo'], .125)

        weights = dict(provider['ba', 'foo', 'knark', 'knirk'])
        self.assertEqual(['ba', 'foo', 'knark', 'knirk'], sorted(weights.keys()))
        self.assertAlmostEqual(weights['ba'], .5)
        self.assertAlmostEqual(weights['knark'], .25)
        self.assertAlmostEqual(weights['knirk'], .125)
        self.assertAlmostEqual(weights['foo'], .125)

    def test_getitem_missing(self):
        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=False)

        self.assertRaises(KeyError, lambda: provider['notfound'])
        self.assertRaises(KeyError, lambda: provider['ba', 'notfound'])

        provider = ESTermAggregationWeightProvider(self.es, self.index, self.doc_type, self.field,
                                        inverse=False, sublinear=False, missing='ignore')

        self.assertIsNone(provider['notfound'])
        self.assertEqual([('ba', .5)], list(provider['ba', 'notfound']))
예제 #56
0
def main(index_num):
    n_out      = int(10e6)
    n_batch    = int(4e3)
    n_batches  = n_out // n_batch
    index      = 'image_hashes_%02d' % index_num
    
    client = Elasticsearch('localhost:9200')
    index_client = IndicesClient(client)
    
    if index_client.exists(index):
        print('Not deleting %s!' % index); return; sys.exit(1)
        index_client.delete(index)
    
    es_short = {
        'type': 'short',
    }
    
    field_name = lambda i: '%x' % i
    fields = {field_name(i): es_short for i in range(n_samples)}
    fields['raw'] = {
        'type': 'string',
        'store': True,
        'index': 'not_analyzed',
        'doc_values': True
    }
    
    index_client.create(index=index, body={
        'settings': {
            'number_of_shards':   4,
            'number_of_replicas': 0
        },
        'mappings': {
            'images': {
                '_source': {'enabled': False},
                'properties': fields
            }
        }
    })
    
    sampler, pow2 = get_sampler(n_samples, b_p_sample)
    start_time = time.time()
    
    for i_batch in range(1, n_batches+1):
        data = np.random.randn(n_batch, dim_in)
        hash = (data.dot(proj) > 0).astype(np.uint64)
        hash_int = hash.dot(2**np.arange(dim_out).astype(np.uint64))
		
        #print('\n'.join(repr(i.astype(np.uint8)) for i in hash)); return
        
        sampled = np.vstack(
            hash.dot(sampler[:,:,j]).dot(pow2)
            for j in range(n_samples)
        ).astype(np.int16).T.tolist()
        
        #print(repr(sampled)); print(repr([len(sampled), len(sampled[0])])); return
        
        docs = []
        
        for i in range(n_batch):
            doc = {
                field_name(j): sampled[i][j] for j in range(n_samples)
            }
            doc['raw'] = '{0:064b}'.format(hash_int[i])
            doc_id = random.getrandbits(63)
            
            docs.append('{"index":{"_index": "%s", "_type": "images", "_id": "%d"}})' % (index, doc_id))
            docs.append(json.dumps(doc))
        
        #print(json.dumps(json.loads(docs[1]), indent=4)); return
        
        try:
            response = client.bulk(body='\n'.join(docs))
        except:
            # Even when an exception is thrown typically documents were stored in ES
            sleep_seconds = 10
            print('\rHTTP timed out, sleeping %d seconds...' % sleep_seconds)
            time.sleep(sleep_seconds)

        print('\rChunk %5d/%d, %5.2f%%' % (i_batch, n_batches, i_batch*100.0/n_batches), end='')
    
    index_time = time.time()
    print('\nCalling optimize, indexing took %.1f s...' % (index_time - start_time))
    sys.stdout.flush()
    
    index_client.optimize(index=index, max_num_segments=3, request_timeout=1e6)
    print('Optimization done in %.1f s' % (time.time() - index_time))
예제 #57
0
class ESExporter:
    def __init__(self, sm_config):
        self.es = Elasticsearch(hosts=[{"host": sm_config['elasticsearch']['host']}])
        self.ind_client = IndicesClient(self.es)

    def _index(self, annotations):
        to_index = []
        for r in annotations:
            d = dict(zip(COLUMNS, r))
            d['comp_names'] = u'|'.join(d['comp_names']).replace(u'"', u'')
            d['comp_ids'] = u'|'.join(d['comp_ids'])
            d['mz'] = '{:010.4f}'.format(d['mz']) if d['mz'] else ''

            to_index.append({
                '_index': 'sm',
                '_type': 'annotation',
                '_id': '{}_{}_{}_{}'.format(d['ds_name'], d['db_name'], d['sf'], d['adduct']),
                '_source': d
            })

        bulk(self.es, actions=to_index, timeout='60s')

    def _delete(self, annotations):
        to_delete = []
        for r in annotations:
            d = dict(zip(COLUMNS, r))
            to_delete.append({
                '_op_type': 'delete',
                '_index': 'sm',
                '_type': 'annotation',
                '_id': '{}_{}_{}_{}'.format(d['ds_name'], d['db_name'], d['sf'], d['adduct']),
            })
        try:
            bulk(self.es, to_delete)
        except BulkIndexError as e:
            logger.warn('{} - {}'.format(e.args[0], e.args[1][1]))

    def index_ds(self, db, ds_name, db_name):
        annotations = db.select(RESULTS_TABLE_SQL, ds_name, db_name)

        logger.info('Deleting documents from the index: {}-{}'.format(ds_name, db_name))
        self._delete(annotations)

        logger.info('Indexing documents: {}-{}'.format(ds_name, db_name))
        self._index(annotations)

    def create_index(self, name='sm'):
        body = {
            'settings': {
                "index": {
                    'max_result_window': 2147483647,
                    "analysis": {
                        "analyzer": {
                            "analyzer_keyword": {
                                "tokenizer": "keyword",
                                "filter": "lowercase"
                            }
                        }
                    }
                }
            },
            'mappings': {
                "annotation": {
                    "properties": {
                        "db_name": {"type": "string", "index": "not_analyzed"},
                        "ds_name": {"type": "string", "index": "not_analyzed"},
                        "sf": {"type": "string", "index": "not_analyzed"},
                        "comp_names": {
                            "type": "string",
                            "analyzer": "analyzer_keyword",
                        },
                        "comp_ids": {"type": "string", "index": "not_analyzed"},
                        "chaos": {"type": "float", "index": "not_analyzed"},
                        "image_corr": {"type": "float", "index": "not_analyzed"},
                        "pattern_match": {"type": "float", "index": "not_analyzed"},
                        "msm": {"type": "float", "index": "not_analyzed"},
                        "adduct": {"type": "string", "index": "not_analyzed"},
                        "fdr": {"type": "float", "index": "not_analyzed"},
                        "mz": {"type": "string", "index": "not_analyzed"}
                    }
                }
            }
        }
        if not self.ind_client.exists(name):
            out = self.ind_client.create(index=name, body=body)
            logger.info('Index {} created\n{}'.format(name, out))
        else:
            logger.info('Index {} already exists'.format(name))

    def delete_index(self, name='sm'):
        out = self.ind_client.delete(name)
        logger.info('Index {} deleted\n{}'.format(name, out))
예제 #58
0
파일: ese.py 프로젝트: merlin83/ese
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--src-host", action="store", default="127.0.0.1", type=unicode, help="Source host [default: %(default)s]")
    parser.add_argument("--src-port", action="store", default=9200, help="Source port [default: %(default)s]")
    parser.add_argument("--src-index", action="store", default="", type=unicode, help="Source index")
    parser.add_argument("--src-batch-size", action="store", type=int, default=5000, help="Source query batchsize [default: %(default)s]")
    parser.add_argument("--src-scroll-interval", action="store", type=unicode, default="60m", help="Interval for source scroll query [default: %(default)s]")

    parser.add_argument("--dest-host", action="store", default="127.0.0.1", type=unicode, help="Destination host [default: %(default)s]")
    parser.add_argument("--dest-port", action="store", default=9200, help="Destination port [default: %(default)s]")
    parser.add_argument("--dest-index", action="store", default="", type=unicode, help="Destination index")
    parser.add_argument("--dest-batch-size", action="store", type=int, default=5000, help="Destination batchsize [default: %(default)s]")
    parser.add_argument("--dest-alias", action="store", help="Destination index alias (to be set after we have finished populating)")
    parser.add_argument("--dest-concurrency", action="store", type=int, default=4, help="Destination batchsize [default: %(default)s]")
    parser.add_argument("--dest-delete-index", action="store_true", help="Delete destination index at before starting")

    parser.add_argument("--query", action="store", type=unicode, default="", help="Query to use [if None is specified, a match_all will be used]")

    args = parser.parse_args()

    if args.src_index is None or len(args.src_index) == 0:
        raise Exception("--src-index must be specified!")

    if args.dest_index is None or len(args.dest_index) == 0:
        raise Exception("--dest-index must be specified!")

    dt_start = datetime.now()
    # copy mapping
    src_es_instance = get_elasticsearch(args.src_host, args.src_port)
    dest_es_instance = get_elasticsearch(args.dest_host, args.dest_port)
    # check if src_index exists
    src_es_ic = IndicesClient(src_es_instance)
    if not src_es_ic.exists(args.src_index):
        raise Exception("--src-index %s does not exist!" % args.src_index)
    # check if dest_index exists
    dest_es_ic = IndicesClient(dest_es_instance)
    if dest_es_ic.exists(args.dest_index):
        if args.dest_delete_index:
            dest_es_ic.delete(index=args.dest_index)
        else:
            raise Exception("--dest-index %s already exists! Use --dest-delete-index if you want to drop it" % args.dest_index)
    log.info("Copying mapping...")
    # copy mapping over to dest
    src_index_information = src_es_ic.get(index=args.src_index)
    dest_es_ic.create(index=args.dest_index, body=src_index_information.get(args.src_index, {}))
    # set num_of_replicas to 0
    dest_es_ic.put_settings(index=args.dest_index, body={"settings": {"index": {"number_of_replicas": 0}}})
    # perform multiprocessing
    log.info("Copying data...")
    MAGIC_STRING = "%s:%s" % (str(uuid4()), str(uuid4()))
    DEST_QUEUE = Queue()
    DEST_COUNTER = Value('i', 0)
    src_process = Process(target=src_worker, args=(args, DEST_QUEUE, MAGIC_STRING))
    src_process.start()
    dest_processes = [Process(target=dest_worker, args=(args, DEST_QUEUE, MAGIC_STRING, DEST_COUNTER)) for i in xrange(args.dest_concurrency)]
    for i in dest_processes: i.start()
    src_process.join()
    for i in dest_processes: i.join()
    log.info("[dest_worker] Total processed %s" % DEST_COUNTER.value)
    if args.dest_alias is not None and len(args.dest_alias) > 0:
        # we remove all existing mappings to this alias, then add it to the current dest_index
        for idx_name, aliases_mapping in dest_es_ic.get_aliases().iteritems():
            if args.dest_alias in aliases_mapping.get("aliases", {}):
                dest_es_ic.delete_alias(index=idx_name, name=args.dest_alias)
        dest_es_ic.put_alias(index=args.dest_index, name=args.dest_alias)
    dest_es_ic.refresh(args.dest_index)
    dt_end = datetime.now()
    log.info("Time elapsed: %s" % (dt_end-dt_start, ))