Example #1
0
def main():
    # Define the globals
    global index_names
    global STARTED_TIMESTAMP
    global es
    global es_indices
    try:
        # Initiate the elasticsearch session using ES low-level client.
        # By default nodes are randomized before passed into the pool and round-robin strategy is used for load balancing.
        es = Elasticsearch(ES_HOSTS, timeout=30)
        es_indices = IndicesClient(es)

    except:
        print("Could not connect to elasticsearch!")
        sys.exit(1)

    print("Creating indices.. \n"),
    indices = generate_indices()
    print("Done!\n")

    print("GET Settings \n"),
    print json.dumps(es_indices.get_settings(index="_all"),
                     sort_keys=True,
                     indent=4,
                     separators=(',', ': '))
    print("Done!\n")

    # We will Clean up the indices by default
    # Default: True
    if CLEANUP:
        print("Cleaning up created indices.. "),
        cleanup_indices()
        print("Done!\n")
Example #2
0
 def setUp(self):
     """
     Instantiate our ES client and make sure all indexes are deleted before each test
     """
     super().setUp()
     self.indices_client = IndicesClient(client=ES_CLIENT)
     self.indices_client.delete(index="_all")
def create_wikipedia_index(ic: IndicesClient) -> None:
    """
    Add an index to Elasticsearch called 'wikipedia'

    Parameters
    ----------
    ic : IndicesClient
        The client to control Elasticsearch index settings

    Returns
    -------
    None
    """
    request_body = {
        "settings": {
            "analysis": {
                "analyzer": {
                    "my_analyzer": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": ["lowercase", "my_stops"]
                    }
                },
                "filter": {
                    "my_stops": {
                        "type": "stop",
                        "stopwords_path": "stopwords.txt"
                    }
                }
            }
        }
    }

    ic.create(index="wikipedia", body=request_body)
Example #4
0
 def setUp(self):
     """
     Make sure all indexes are deleted before each new test is run.
     """
     super().setUp()
     self.indices_client = IndicesClient(client=settings.ES_CLIENT)
     self.indices_client.delete(index="_all")
Example #5
0
 def __init__(self, driver, connection_name, connection_config):
     super(ElasticsearchConnection, self).__init__(driver, connection_name,
                                                   connection_config)
     self.uri = self.connection_config.get('uri').split(',')
     self.cnx_opts = {}
     use_ssl = self.connection_config.get('use_ssl', True)
     if isinstance(use_ssl, str):
         if use_ssl.lower() == 'false':
             use_ssl = False
         else:
             use_ssl = True
     self.cnx_opts['use_ssl'] = use_ssl
     if use_ssl:
         verify_certs = self.connection_config.get('verify_certs', True)
         if isinstance(verify_certs, str):
             if verify_certs.lower() == 'false':
                 verify_certs = False
             else:
                 verify_certs = True
         self.cnx_opts['verify_certs'] = verify_certs
         self.cnx_opts['ca_certs'] = self.connection_config.get(
             'ca_certs', None)
         self.cnx_opts['client_cert'] = self.connection_config.get(
             'client_cert', None)
         self.cnx_opts['client_key'] = self.connection_config.get(
             'client_key', None)
     self.es = Elasticsearch(self.uri, **self.cnx_opts)
     try:
         self.log.debug("Elasticsearch info: %s" % self.es.info())
     except Exception as e:
         self.log.warn("An error occured on estabilishing "
                       "connection to Elasticsearch: %s" % e)
     self.ic = IndicesClient(self.es)
class TestSingleDocSigTerms(TestCase):
    def setUp(self):
        super(TestSingleDocSigTerms, self).setUp()

        self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'single_doc_sigterms_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index, self.doc_type, {self.field: 'foo ba knark foo knirk knark foo'}, id='doc_1')

    def test_tf_for_doc_id(self):
        sigterms = SingleDocSigTerms(self.es, self.index, self.doc_type, self.field, None)

        resp = dict(sigterms.tf_for_doc_id('doc_1'))
        self.assertEquals(4, len(resp))
        self.assertEquals(3, resp['foo'])
        self.assertEquals(2, resp['knark'])
        self.assertEquals(1, resp['ba'])
        self.assertEquals(1, resp['knirk'])
Example #7
0
 def test_tokenize(self):
     test_case = {
         "text": "2018-06-08T00:00:00Z INFO GET /v1/bundles/7ef8966b-45ef-4e0a-a51b-44a865372050.2018-06-08T230333.785338Z?param1=1&param2=2 {\"key\": \"value\"}"
     }
     index_name = self.es_client._format_today_index_name(self.index_prefix)
     index_client = IndicesClient(TestESClient.es)
     with self.new_index(index_name):
         response = index_client.analyze(index=index_name, body=test_case)
         tokens = [t['token'] for t in response['tokens']]
     self.assertEqual(set(tokens), {
         '7ef8966b-45ef-4e0a-a51b-44a865372050',
         '2018-06-08T230333.785338Z',
         ':',
         'INFO',
         '1',
         '2',
         'v1',
         'bundles',
         'key',
         'GET',
         'param2',
         'param1',
         '2018-06-08T00:00:00Z',
         'value'
     })
     self.assertEqual(len(tokens), 14)
Example #8
0
 def setUp(self):
     """
     Make sure tests get clean indexes to run
     """
     self.indices_client = IndicesClient(client=settings.ES_CLIENT)
     # Delete any existing indexes so we get a clean slate
     self.indices_client.delete(index="_all")
     # Create an index we'll use to test the ES features
     self.indices_client.create(index="test_index")
     # Add a mapping for a test document type. It needs to include different fields for the
     # various features we'll be running tests on
     self.indices_client.put_mapping(
         body={
             "properties": {
                 "datetime_field": {
                     "type": "date"
                 },
                 "keyword_field": {
                     "type": "keyword"
                 },
                 "text_field": {
                     "type": "text"
                 },
             }
         },
         doc_type="test_doc",
         index="test_index",
     )
Example #9
0
def import_examples_into_es(examples: list):
    index_name = config.index_name
    type_name = config.type_name
    buck_size = config.buck_size

    es = Elasticsearch(config.es_url)
    es_index = IndicesClient(es)
    if es_index.exists(index=index_name):
        es_index.delete(index=index_name)
    # 创建索引
    with open(config.es_index_json) as f:
        mappings = json.load(f)

    res = es.indices.create(index=index_name, body=mappings)

    # 数据批量导入es
    for i in range(len(examples)):
        examples[i] = {
            "_index": index_name,
            "_type": type_name,
            "_id": examples[i]["ntc_id"],
            "_source": examples[i]
        }

    for i in tqdm(range(ceil(len(examples) / buck_size)), desc="Import into ES"):
        bulk(es, actions=examples[i * buck_size: min((i + 1) * buck_size, len(examples))])
Example #10
0
    def __init__(self, conf, queue):
        self.conf = conf
        host = self.conf.get("host", "es")
        port = self.conf.get("port", 9200)
        self.log = logging.getLogger("pulsar.indexer")
        logging.getLogger("elasticsearch").setLevel(logging.INFO)
        self.log.debug("port: %r" % port)
        self.es = Elasticsearch([{"host": host, "port": port}])
        self.cluster_client = ClusterClient(self.es)
        health = self.cluster_client.health()
        if not health or health.get("number_of_nodes") < 1:
            raise Exception("No Elasticsearch nodes found: %r" % health)
        # Put our template
        self.indices_client = IndicesClient(self.es)
        self.index_prefix = self.conf.get("index_prefix", self.INDEX_PREFIX)
        self.indices_client.put_template(
            name=self.index_prefix, body=open("conf/es-template.json").read())
        self.log.info("Put template to ES for pulsar indexes")
        self.last_event_time = time()
        self.index_prefix = self.index_prefix + "-"
        self.index_name = self.get_index_name()
        self.queue = queue
        self.counter = 0
        self.stats_checkpoint = time()
        self.stats_every = 10000

        try:
            # This will block as it reads from the queue
            self.bulk(self.es, self.iterator(), stats_only=True)
        except Exception as e:
            self.log.exception("Error with bulk", exc_info=e)
Example #11
0
class IndexBase:

    def __init__(self, **kwargs):
        self.index = kwargs.pop('index')
        self.client = client_es
        self.client_index = IndicesClient(self.client)

        if kwargs.get('settings')
            self.settings = kwargs.pop('settings')
        else:
            self.settings = DEFAULT_SETTINGS

        if self.exist_index():
            self.delete_index()
            self.create_index()
        else:
            self.create_index()

    def exist_index(self):
        return self.client_index.exists(index=self.index)

    def delete_index(self):
        return self.client_index.delete(index=self.index, ignore=[400, 404])

    def create_index(self):
        return self.client_index.create(index=self.index, body=self.settings)
Example #12
0
    def setUp(self):
        """ Starts a new connector for every test
        """
        try:
            os.unlink("config.txt")
        except OSError:
            pass
        open("config.txt", "w").close()
        self.connector = Connector(
            address='%s:%s' % (mongo_host, self.primary_p),
            oplog_checkpoint='config.txt',
            target_url=elastic_pair,
            ns_set=['test.test'],
            u_key='_id',
            auth_key=None,
            doc_manager='mongo_connector/doc_managers/elastic_doc_manager.py',
            auto_commit_interval=0
        )
        # Clean out test databases
        try:
            self.elastic_doc._remove()
        except OperationFailed:
            try:
                # Create test.test index if necessary
                client = Elasticsearch(hosts=[elastic_pair])
                idx_client = IndicesClient(client)
                idx_client.create(index='test.test')
            except es_exceptions.TransportError:
                pass

        self.conn.test.test.drop()
        self.connector.start()
        assert_soon(lambda: len(self.connector.shard_set) > 0)
        assert_soon(lambda: sum(1 for _ in self.elastic_doc._search()) == 0)
    def _create_index(self):
        es_index = IndicesClient(self._es)
        if es_index.exists(self._store_index):
            logging.info('Index ' + self._store_index +
                         ' already exists. Skipping index creation.')
            return None

        es_mapping = {
            "mappings": {
                'last_runtime': {
                    'properties': {
                        'plugin_name': {
                            'index': 'not_analyzed',
                            'type': 'string'
                        },
                        'rule_name': {
                            'index': 'not_analyzed',
                            'type': 'string'
                        },
                        'plugin_sid': {
                            'index': 'not_analyzed',
                            'type': 'long'
                        },
                        '@timestamp': {
                            'format': 'dateOptionalTime||epoch_millis',
                            'type': 'date'
                        }
                    }
                }
            }
        }

        self._es.indices.create(self._store_index, body=es_mapping)

        time.sleep(1)
Example #14
0
def get_es_indices():
    es_idx = IndicesClient(es)
    indicies = es_idx.get('appcompat-*')
    result = []
    for index_name,v in indicies.iteritems():
        result.append((index_name,index_name,v['settings']['index']['creation_date']))
    return result
Example #15
0
	def handle(self, *args, **options):

		es = Elasticsearch(hosts=[{'host': 'localhost', 'port': 9200}])

		fop=open('spider/management/commands/'+str(argv[2]), 'r')
		inds = IndicesClient(es)

		mapping={ "mappings": { "product_type":  {  "properties": { "code": { "type" : "string" },"name": {"type" : "string"},"img": {"type" : "string"},"url": {"type" : "string"},"price_reg": {"type" : "float"},"price_discount": {"type" : "float"}}}}}

		if not inds.exists(index='gearbest_index'):
			inds.create(index='gearbest_index',body=mapping)
			print 'gearbest_index created'

		for jsonline in fop:
			jobj=loads(jsonline)
			del jobj["_type"]
			es.index(index="gearbest_index",doc_type='product_type', body=jobj, id=jobj['code'])
			
			disc=0
			reg=0

			if len(jobj['price_discount'])>0:
				disc  = float(jobj['price_discount'][0])
			if len(jobj['price_reg'])>0:
				reg  = float(jobj['price_reg'][0])

			#insert="INSERT into 'price_gb' ('price','price_disc','code','date') values ("+str(reg)+", "+str(disc)+", '"+str(jobj['code'])+"', '"+str(datetime.today())+"')"
			#cursor = connection.cursor()
			#cursor.execute(insert)

			add_price=Price_gb(price=reg,price_disc=disc,code=str(jobj['code']),date=datetime.date.today())
			add_price.save()

			print 'code='+str(jobj['code'])
class TestSingleDocSigTerms(TestCase):
    def setUp(self):
        super(TestSingleDocSigTerms, self).setUp()

        self.es = Elasticsearch(
            hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'single_doc_sigterms_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index,
                       self.doc_type,
                       {self.field: 'foo ba knark foo knirk knark foo'},
                       id='doc_1')

    def test_tf_for_doc_id(self):
        sigterms = SingleDocSigTerms(self.es, self.index, self.doc_type,
                                     self.field, None)

        resp = dict(sigterms.tf_for_doc_id('doc_1'))
        self.assertEquals(4, len(resp))
        self.assertEquals(3, resp['foo'])
        self.assertEquals(2, resp['knark'])
        self.assertEquals(1, resp['ba'])
        self.assertEquals(1, resp['knirk'])
Example #17
0
    def setUp(self):
        """ Starts a new connector for every test
        """
        try:
            os.unlink("config.txt")
        except OSError:
            pass
        open("config.txt", "w").close()
        self.connector = Connector(
            address='%s:%s' % (mongo_host, self.primary_p),
            oplog_checkpoint='config.txt',
            target_url=elastic_pair,
            ns_set=['test.test'],
            u_key='_id',
            auth_key=None,
            doc_manager='mongo_connector/doc_managers/elastic_doc_manager.py',
            auto_commit_interval=0)
        # Clean out test databases
        try:
            self.elastic_doc._remove()
        except OperationFailed:
            try:
                # Create test.test index if necessary
                client = Elasticsearch(hosts=[elastic_pair])
                idx_client = IndicesClient(client)
                idx_client.create(index='test.test')
            except es_exceptions.TransportError:
                pass

        self.conn.test.test.drop()
        self.connector.start()
        assert_soon(lambda: len(self.connector.shard_set) > 0)
        assert_soon(lambda: sum(1 for _ in self.elastic_doc._search()) == 0)
    def __init__(self, host, port, db_config):
        self.es = Elasticsearch([{"host": host, "port": port}])

        if not self.es.ping():
            error(
                "Cannot connect to elasticsearch cluster for users. Check database configuration in user_db_config.json."
            )
            exit(0)

        index = db_config["index"]
        self.index = index["name"]

        user_type = db_config["user_type"]
        self.user_type_name = user_type["name"]

        mappings = dict()
        if "mapping" in user_type:
            mappings[self.user_type_name] = user_type["mapping"]
        body = dict()

        if "settings" in index:
            body["settings"] = index["settings"]
        if mappings:
            body["mappings"] = mappings

        try:
            self.indices_client = IndicesClient(self.es)
            if not self.indices_client.exists(self.index):
                self.indices_client.create(index=self.index, body=body)
        except TransportError:
            error(
                "Error while creating elasticsearch cluster for users. Check type mappings in user_db_config.json."
            )
            print(traceback.format_exc())
            exit(0)
def main():
    # Define the globals
    global index_names
    global STARTED_TIMESTAMP
    global es
    global es_indices
    try:
        # Initiate the elasticsearch session using ES low-level client.
        # By default nodes are randomized before passed into the pool and round-robin strategy is used for load balancing.
        es = Elasticsearch(ES_HOSTS, timeout=30)
        es_indices = IndicesClient(es)

    except:
        print("Could not connect to elasticsearch!")
        sys.exit(1)

    print("Creating indices.. \n"),
    indices = generate_indices()
    print("Done!\n")

    print("GET Settings \n"),
    print json.dumps(es_indices.get_settings(index="_all"), sort_keys=True,indent=4, separators=(',', ': '))
    print("Done!\n")

    # We will Clean up the indices by default
    # Default: True
    if CLEANUP:
        print("Cleaning up created indices.. "),
        cleanup_indices()
        print("Done!\n")
Example #20
0
    def test_index_manager_regenerate_indices_from_broken_state(self, *args):
        """
        `regenerate_indices` should succeed and give us a working ElasticSearch
        when it runs and finds a broken state (eg. with an existing, incorrect
        index with the name of an alias).

        This can occur when ES restarts and an update signal is triggered before
        Richie had a chance to bootstrap ES.
        """
        # The indices client will be used to test the actual indices in ElasticSearch
        indices_client = IndicesClient(client=ES_CLIENT)

        # Create a course and trigger a signal to index it. This will create a
        # broken "richie_test_courses" index
        course = CourseFactory(should_publish=True)
        update_course(course.extended_object, "en")
        self.assertIsNotNone(indices_client.get("richie_test_courses"))

        # Call our `regenerate_indices command`
        creation_datetime = datetime(2010, 1, 1, tzinfo=timezone.utc)
        creation_string = creation_datetime.strftime("%Y-%m-%d-%Hh%Mm%S.%fs")
        with mock.patch.object(timezone, "now", return_value=creation_datetime):
            regenerate_indices(None)

        # No error was thrown, the courses index (like all others) was bootstrapped
        self.assertIsNotNone(
            indices_client.get(f"richie_test_courses_{creation_string}")
        )
        # The expected alias is associated with the index
        self.assertEqual(
            list(indices_client.get_alias("richie_test_courses").keys())[0],
            f"richie_test_courses_{creation_string}",
        )
Example #21
0
    def __init__(self, host, port, db_config):
        self.es = Elasticsearch([{"host": host, "port": port}])
        try:
            if self.es.ping():
                es_logger = logging.getLogger('elasticsearch')
                es_logger.setLevel(logging.CRITICAL)

                self.indices_client = IndicesClient(self.es)

                index_definitions = db_config["index_definitions"]
                self.settings = db_config["settings"]

                self.data_point_definition = index_definitions["data_point"]
                self.create_index_from_definition(self.data_point_definition,
                                                  self.settings)
                self.data_point_type_name = self.data_point_definition["name"]
                self.data_point_index = self.data_point_definition[
                    "index_name"]
                self.definitions.append(self.data_point_definition)

                self.experiment_definition = index_definitions["experiment"]
                self.create_index_from_definition(self.experiment_definition,
                                                  self.settings)
                self.experiment_type_name = self.experiment_definition["name"]
                self.experiment_index = self.experiment_definition[
                    "index_name"]
                self.definitions.append(self.experiment_definition)

                self.target_system_definition = index_definitions[
                    "target_system"]
                self.create_index_from_definition(
                    self.target_system_definition, self.settings)
                self.target_system_type_name = self.target_system_definition[
                    "name"]
                self.target_system_index = self.target_system_definition[
                    "index_name"]
                self.definitions.append(self.target_system_definition)

                self.analysis_definition = index_definitions["analysis"]
                self.create_index_from_definition(self.analysis_definition,
                                                  self.settings)
                self.analysis_type_name = self.analysis_definition["name"]
                self.analysis_index = self.analysis_definition["index_name"]
                self.definitions.append(self.analysis_definition)

                self.stage_definition = index_definitions["stage"]
                self.create_index_from_definition(self.stage_definition,
                                                  self.settings)
                self.stage_type_name = self.stage_definition["name"]
                self.stage_index = self.stage_definition["index_name"]
                self.definitions.append(self.stage_definition)

            else:
                raise ConnectionError("Host/port values are not valid")
        except TransportError as err1:
            error(
                "TransportError while creating elasticsearch instance for experiments. Check type mappings in experiment_db_config.json."
            )
            raise err1
Example #22
0
def create_index(name):
    es = get_es()
    ic = IndicesClient(es)
    body = {}
    # body.update(settings.INDEX_SETTINGS)
    body.update(settings.INDEX_MAPPINGS)
    resp = ic.create(name, json.dumps(body))
    logger.debug('index create: ' + str(resp))
    def __mapFile(self, json_map_file):
        es = Elasticsearch([{'host': self.elasticsearch_host, 'port': self.elasticsearch_port}])
#        es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
        ic = IndicesClient(es)
        with open(json_map_file) as json_data:
            d = json.load(json_data)
            doc_type = list(d.keys())[0]
            ic.put_mapping(index='wow', doc_type=doc_type, body=d)
Example #24
0
 def delete_all(self):
     """delete index"""
     try:
         indices_client = IndicesClient(self._es)
         indices_client.delete(index=self._index)
     except Exception as e:
         _eprint("exception on delete_index {}".format(e))
         pass
Example #25
0
def create_index():
    es = Elasticsearch()
    client = IndicesClient(es)
    
    try:
        client.delete('physicians')
    except Exception, e:
        print e
Example #26
0
def create_index(es, index_name):
    es_indices = IndicesClient(es)
    # es_indices.create(index=index_name)
    with open('data/FifaRecords.mappings.json') as json_data:
        d = json.load(json_data)

    es_indices.create(index=index_name, body=d)
    print("Created ES index {}".format(index_name))
Example #27
0
 def _create_main_index_if_not_exists(self):
     """
     method that creates new elastic index if not existed
     :return:
     """
     ic = IndicesClient(self.es)
     if not ic.exists(MAIN_INDEX_NAME):
         ic.create(MAIN_INDEX_NAME)
Example #28
0
def create_index_excel(excel_filename):
    indices_client = IndicesClient(models.client)
    index_name = 'excel'
    if len(excel_filename):
        doc_type = os.path.splitext(excel_filename)[0]
        index_name = 'excel_' + doc_type
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
Example #29
0
    def _reset_mapping(self, mapping_path):
        esi = IndicesClient(es.get_es_handle())
        index = settings.ES_INDEX

        if not esi.exists(index):
            raise CommandError("Non existing index : %s"%index)

        self.stdout.write(str(esi.delete(index=index)))
Example #30
0
def cmd_print_mapping(ctx, index):
    context = get_conn()
    doctype = context.get_doctype()
    conn = context.connection()
    indices_client = IndicesClient(conn)
    resp = indices_client.get_mapping(index=index)
    mapping = resp[index]["mappings"][doctype]["properties"]
    pprint(mapping)
Example #31
0
    def __init__(self, index, doc_type, hosts=None):
        if hosts is None:
            hosts = ['localhost']

        self.es = Elasticsearch(hosts=hosts, verify_certs=False, timeout=60)
        self.ic = IndicesClient(self.es)
        self.index = index
        self.doc_type = doc_type
def remover_indice(nome_indice):
    """Remove o indice do Elasticsearch.

    O indice de elasticsearch é análogo a uma tabela em um SGBD.
    """
    es = conectar_em_elastic_search()
    client_indice = IndicesClient(es)
    if client_indice.exists(index=[nome_indice]):
        client_indice.delete(nome_indice)
    def create_index_if_not_exists(self):
        """ Check if index exists & if not exists create index & types & store their mappings.  """

        ic = IndicesClient(self.es)
        response = ic.exists(index=[self.index_name])
        if not response:
            es_mappings = ElasticSearchController.get_index_mapper_dict()
            index_response = ic.create(index=self.index_name,
                                       body={"mappings": es_mappings})
	def create_index_if_not_exists(self):
		
		""" Check if index exists & if not exists create index & types & store their mappings.  """
		
		ic = IndicesClient(self.es)
		response = ic.exists(index=[self.index_name])
		if not response:
			es_mappings = ElasticSearchController.get_index_mapper_dict()			
			index_response = ic.create(index=self.index_name, body={ "mappings":es_mappings })
Example #35
0
 def __init__(self):
     print(os.path.join(self.SETTINGS_DIR, 'corpus.json'))
     f = open(os.path.join(self.SETTINGS_DIR, 'corpus.json'),
              'r', encoding='utf-8')
     self.settings = json.loads(f.read())
     f.close()
     self.name = self.settings['corpus_name']
     self.languages = self.settings['languages']
     if len(self.languages) <= 0:
         self.languages = [self.name]
     self.input_format = self.settings['input_format']
     self.corpus_dir = os.path.join('../corpus', self.name)
     self.iterSent = None
     if self.input_format in ['json', 'json-gzip']:
         self.iterSent = JSONDocReader(format=self.input_format)
     self.goodWordFields = ['lex', 'wf', 'wf_display',
                            'parts', 'gloss', 'gloss_index', 'n_ana',
                            'trans_en', 'trans_ru']
     self.AdditionalWordFields = set()
     if 'word_fields' in self.settings:
         self.AdditionalWordFields |= set(self.settings['word_fields'])
     if 'word_table_fields' in self.settings:
         self.AdditionalWordFields |= set(self.settings['word_table_fields'])
     if 'accidental_word_fields' in self.settings:
         self.AdditionalWordFields -= set(self.settings['accidental_word_fields'])
     f = open(os.path.join(self.SETTINGS_DIR, 'categories.json'),
              'r', encoding='utf-8')
     categories = json.loads(f.read())
     self.goodWordFields += ['gr.' + v for lang in categories
                             for v in categories[lang].values()]
     self.goodWordFields = set(self.goodWordFields)
     f.close()
     self.pd = PrepareData()
     self.es = Elasticsearch()
     self.es_ic = IndicesClient(self.es)
     self.shuffled_ids = [i for i in range(1, 1000000)]
     random.shuffle(self.shuffled_ids)
     self.shuffled_ids.insert(0, 0)    # id=0 is special and should not change
     self.tmpWordIDs = [{} for i in range(len(self.languages))]    # word as JSON -> its integer ID
     self.tmpLemmaIDs = [{} for i in range(len(self.languages))]   # lemma as string -> its integer ID
     self.word2lemma = [{} for i in range(len(self.languages))]    # word's ID -> ID of its lemma (or -1, if none)
     self.wordFreqs = [{} for i in range(len(self.languages))]     # word's ID -> its frequency
     self.wordSFreqs = [{} for i in range(len(self.languages))]    # word's ID -> its number of sentences
     self.wordDocFreqs = [{} for i in range(len(self.languages))]  # (word's ID, dID) -> word frequency in the document
     # self.wordSIDs = [{} for i in range(len(self.languages))]      # word's ID -> set of sentence IDs
     self.wordDIDs = [{} for i in range(len(self.languages))]      # word's ID -> set of document IDs
     self.wfs = set()         # set of word forms (for sorting)
     self.lemmata = set()     # set of lemmata (for sorting)
     self.sID = 0          # current sentence ID for each language
     self.dID = 0          # current document ID
     self.wID = 0          # current word ID
     self.wordFreqID = 0
     self.numWords = 0     # number of words in current document
     self.numSents = 0     # number of sentences in current document
     self.numWordsLang = [0] * len(self.languages)    # number of words in each language in current document
     self.numSentsLang = [0] * len(self.languages)    # number of sentences in each language in current document
     self.totalNumWords = 0
Example #36
0
 def deleteIndex(self):
     self.es = Elasticsearch([{
         'host': elasticConfig['host'],
         'port': elasticConfig['port']
     }])
     esIndices = IndicesClient(self.es)
     index = elasticConfig['index']
     doc_type = elasticConfig['doc_type']
     esIndices.delete(index=index)
Example #37
0
class EsSchema():
    def __init__(self, client):
        self.client = client
        self.indicesClient = IndicesClient(self.client)
        self.schema = self.load_schema()

    def load_schema(self):
        with codecs.open('data/esdata.json', mode="r",
                         encoding='UTF-8') as file:
            return json.load(file)

    def make_index_template(self):
        if not self.indicesClient.exists_template('soaktest:template'):
            self.indicesClient.put_template("soaktest:template",
                                            self.schema['soaktest:template'])
        if not self.indicesClient.exists_template('soakdownload:template'):
            self.indicesClient.put_template(
                "soakdownload:template", self.schema['soakdownload:template'])

    def make_kibana_index(self):
        if not self.indicesClient.exists_template(
                "kibana_index_template:.kibana"):
            self.indicesClient.put_template(
                "kibana_index_template:.kibana",
                self.schema["kibana_index_template:.kibana"])

    def make_kibana_visualization(self):
        for item in self.schema["kibana:data"]:
            self.client.index(index=".kibana",
                              doc_type=item["_type"],
                              id=item["_id"],
                              body=item["_source"])

    def make_schema(self):
        self.make_index_template()
Example #38
0
def create_index_survey():
    indices_client = IndicesClient(models.client)
    index_name = models.SurveyMap._meta.es_index_name
    if indices_client.exists(index_name):
        indices_client.delete(index=index_name)
    indices_client.create(index=index_name)
    #put_settings(models.ScentemotionMap)
    # add qstfld fields
    es_mapping = models.SurveyMap._meta.es_mapping
    for qst, mapping in survey.qst2fld.items():
        fields = mapping[0]
        field_type = mapping[1]
        if field_type == 'nested_qst_ans':
            for field in fields:
                if field not in es_mapping['properties']:
                    es_mapping['properties'][field] = {}
                    es_mapping['properties'][field]['type'] = 'nested'
                    es_mapping['properties'][field]['properties'] = {}
                    es_mapping['properties'][field]['properties']['question'] = {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}}
                    es_mapping['properties'][field]['properties']['answer'] = {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}}
                        #'type'       : 'nested',
                        #'properties' : {
                        #    'question' : {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}},
                        #    'answer'   : {'type' : 'text', 'fields' : {'keyword' : {'type' : 'keyword', 'ignore_above' : 256}}},
                        #    }
                        #},
    indices_client.put_mapping(
        doc_type=models.SurveyMap._meta.es_type_name,
        #body=models.SurveyMap._meta.es_mapping,
        body=es_mapping,
        index=index_name
        )
 def __createIndex(self):
     es = Elasticsearch([{'host': self.elasticsearch_host, 'port': self.elasticsearch_port}])
     ic = IndicesClient(es)
     if(ic.exists(index='wow')):
         print("deleting old index")
         self.deleteIndex()
     ic.create(index='wow')
     # blah = glob.glob(os.path.join(self.map_directory, '*'))
     for currentFile in glob.glob(os.path.join(self.map_directory, '*')):
         print("MAP FILE: " + currentFile)
         self.__mapFile(currentFile)
Example #40
0
 def status(self):
     idx_client = IndicesClient(self.es)
     for idx in ['raw-article', 'enhanced-article']:
         es_index = self.indexinfo(idx)[0]
         if idx_client.exists(es_index):
             self.logger.info("%s contains %s documents." % (idx, self.es.count(index=es_index)['count']))
             if idx == 'article':
                 query = {"query": {"term": {"status": 1}}}
                 self.logger.info(
                     "%s articles have been processed." % self.es.count(index=es_index, body=query)['count'])
         else:
             self.logger.info("%s does not exist" % es_index)
Example #41
0
def main():
    es_client = Elasticsearch([{'host': args.host, 'port': args.port}])
    es_index = IndicesClient(es_client)
    list_indexes = [index for index in es_index.status()['indices']]
    regexp = re.compile(u'(\d{4})\.(\d{2})\.(\d{2})', re.IGNORECASE | re.UNICODE )
    current_date = datetime.date.today()
    for index in list_indexes:
        res = regexp.search(index)
        if res:
            date_indx = datetime.date(year=int(res.group(1)), month=int(res.group(2)), day=int(res.group(3)))
            if (current_date - date_indx).days > args.old:
                es_index.delete(index)
    def delete_index(self, es):
        """
        Delete the dataset index.

        :param es: Elasticsearch client instance
        :type es: elasticsearch.client.Elasticsearch
        :rtype : NewsgroupsDataset
        """
        ic = IndicesClient(es)
        ic.delete(index=self.es_index, ignore=[400, 404])

        return self
 def setUp(self):
     """Empty ElasticSearch at the start of every test
     """
     try:
         self.elastic_doc._remove()
     except OperationFailed:
         try:
             # Create test.test index if necessary
             client = Elasticsearch(hosts=['localhost:9200'])
             idx_client = IndicesClient(client)
             idx_client.create(index='test.test')
         except es_exceptions.TransportError:
             pass
Example #44
0
    def initialize(self, conf, context):
        host = conf.get('zeit.recommend.elasticsearch.host', 'localhost')
        port = conf.get('zeit.recommend.elasticsearch.port', 9200)
        self.es = Elasticsearch(hosts=[{'host': host, 'port': port}])
        self.match = re.compile('seite-[0-9]|komplettansicht').match
        self.index = '%s-%s' % date.today().isocalendar()[:2]
        ic = IndicesClient(self.es)

        try:
            if not ic.exists(self.index):
                ic.create(self.index)
        except ConnectionError, e:
            log('[UserIndexBolt] ConnectionError, index unreachable: %s' % e)
            return
    def _create_weight_index(es, index):
        """
        Creates the index with the right mapping if it doesn't exist.

        :param es:
        :type es:elasticsearch.Elasticsearch
        :param index:
        :type index:str|unicode
        """
        ic = IndicesClient(es)

        if ic.exists(index):
            logging.info('Index %s already exists ...' % index)
        else:
            ic.create(index=index, body=ES_TERMWEIGHTING_INDEX_SETTINGS)
Example #46
0
    def setUp(self):
        self.settings = TEST_SETTINGS_OBJECT
        self.es = get_es(self.settings)
        self.esi = IndicesClient(self.es)

        self.index = self.settings.get("ES_INDEX")

        #create the index firstly
        if self.esi.exists(self.index):
            self.esi.delete(index=self.index)

        self.esi.create(index=self.index)

        mapping_path = os.path.join(SCRAPY_ROOT,
                                 "resources/mappings.json")

        mapping_str = open(mapping_path, "r").read()
        mappings = json.loads(mapping_str)


        for k,v in mappings.iteritems():
            res = self.esi.put_mapping(self.index, k, {k:mappings[k]})
            #print res


        self.redis_conn = get_redis(self.settings)
Example #47
0
 def initialize(self, idx):
     es_index, es_doctype = self.indexinfo(idx)
     self.logger.info("Initializing %s" % es_index)
     idx_client = IndicesClient(self.es)
     if idx_client.exists(es_index):
         idx_client.delete(es_index)
     idx_client.create(es_index)
     if idx == 'event':
         idx_client.put_mapping(doc_type=es_doctype, index=[es_index], body=event_mapping())
     self.logger.info("%s ready." % es_index)
Example #48
0
 def recreate_index(self):
     indices_client = IndicesClient(client=settings.ES_CLIENT)
     index_name = Student._meta.es_index_name
     if indices_client.exists(index_name):
         indices_client.delete(index=index_name)
     indices_client.create(index=index_name)
     indices_client.put_mapping(
         doc_type=Student._meta.es_type_name,
         body=Student._meta.es_mapping,
         index=index_name
     )
Example #49
0
 def init_state(self, index, host, port):
     self._queue = []
     self.index = index
     self.host = host
     self.port = port
     if host is None:
         self.es = Elasticsearch()
     else:
         self.es = Elasticsearch(hosts=[{'host': host, 'port': port}])
     self.idx_manager = IndicesClient(self.es)
     self.mapper = ESMapper()
Example #50
0
    def _init_mapping(self, mapping_path):
        esi = IndicesClient(es.get_es_handle())
        index = settings.ES_INDEX

        #first create index if not exists
        if not esi.exists(index):
            self.stdout.write("Creating index for db : %s"%index)
            esi.create(index=index)
            self.stdout.write("Index Created for : %s"%index)


        if not mapping_path or not os.path.exists(mapping_path):
            raise CommandError("not existing mapping path")

        mapping_str = open(mapping_path, "r").read()
        mappings = json.loads(mapping_str)


        for k,v in mappings.iteritems():
            res = esi.put_mapping(index, k, {k:mappings[k]})
            self.stdout.write(str(res))
Example #51
0
 def _remove_index_if_exists():
     es = elasticsearch.Elasticsearch()
     from elasticsearch.client import IndicesClient
     es_index = IndicesClient(es)
     if es_index.exists(STORAGE_INDEX_NAME):
         logger.info(
             "Elasticsearch index '{0}' already exists and "
             "will be deleted".format(STORAGE_INDEX_NAME))
         try:
             es_index.delete(STORAGE_INDEX_NAME)
             logger.info('Verifying Elasticsearch index was deleted...')
             deadline = time.time() + 45
             while es_index.exists(STORAGE_INDEX_NAME):
                 if time.time() > deadline:
                     raise RuntimeError(
                         'Elasticsearch index was not deleted after '
                         '30 seconds')
                 time.sleep(0.5)
         except BaseException as e:
             logger.warn('Ignoring caught exception on Elasticsearch delete'
                         ' index - {0}: {1}'.format(e.__class__, e.message))
Example #52
0
def create_stations_mapping():
    idx_client = IndicesClient(es)

    mapping = {
        "properties": {
            "name": {
                "type": "text"
            },
            "link": {
                "type": "text"
            },
            "elevation": {
                "type": "float"
            },
            "coordinates": {
                "type": "geo_point"
            }
        }
    }

    idx_client.put_mapping(doc_type=stations_mapping, index=[stations_index], body=mapping)
Example #53
0
 def remove_log_indices():
     es = elasticsearch.Elasticsearch()
     from elasticsearch.client import IndicesClient
     es_index = IndicesClient(es)
     log_index_pattern = '{0}*'.format(LOG_INDICES_PREFIX)
     if es_index.exists(log_index_pattern):
         logger.info(
             "Elasticsearch indices '{0}' already exist and "
             "will be deleted".format(log_index_pattern))
         try:
             es_index.delete(log_index_pattern)
             logger.info('Verifying Elasticsearch index was deleted...')
             deadline = time.time() + 45
             while es_index.exists(log_index_pattern):
                 if time.time() > deadline:
                     raise RuntimeError(
                         'Elasticsearch index was not deleted after '
                         '30 seconds')
                 time.sleep(0.5)
         except BaseException as e:
             logger.warn('Ignoring caught exception on Elasticsearch delete'
                         ' index - {0}: {1}'.format(e.__class__, e.message))
    def setUp(self):
        super(TestSingleDocSigTerms, self).setUp()

        self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'single_doc_sigterms_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index, self.doc_type, {self.field: 'foo ba knark foo knirk knark foo'}, id='doc_1')
Example #55
0
class RedisEsSetupMixin(object):

    def setUp(self):
        self.settings = TEST_SETTINGS_OBJECT
        self.es = get_es(self.settings)
        self.esi = IndicesClient(self.es)

        self.index = self.settings.get("ES_INDEX")

        #create the index firstly
        if self.esi.exists(self.index):
            self.esi.delete(index=self.index)

        self.esi.create(index=self.index)

        mapping_path = os.path.join(SCRAPY_ROOT,
                                 "resources/mappings.json")

        mapping_str = open(mapping_path, "r").read()
        mappings = json.loads(mapping_str)


        for k,v in mappings.iteritems():
            res = self.esi.put_mapping(self.index, k, {k:mappings[k]})
            #print res


        self.redis_conn = get_redis(self.settings)


    def tearDown(self):
        if self.esi.exists(self.index):
            self.esi.delete(index=self.index)
            print "ES INDEX DELETED"

        #remove redis stuff
        self.redis_conn.flushdb()
        print "REDIS DB DELETED"
    def setUp(self):
        super(TestESTermAggregationWeightProvider, self).setUp()

        self.es = Elasticsearch(hosts=['localhost:%d' % es_runner.es_state.port])
        self.ic = IndicesClient(self.es)
        self.index = 'es_term_weight_provider_test'
        self.doc_type = 'test-doc'
        self.field = 'text'

        if self.ic.exists(self.index):
            self.ic.delete(self.index)

        self.ic.create(self.index)
        self.es.create(self.index, self.doc_type, {self.field: 'foo'})
        self.es.create(self.index, self.doc_type, {self.field: 'knark'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'knirk'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'})
        self.es.create(self.index, self.doc_type, {self.field: 'knark '})
        self.es.create(self.index, self.doc_type, {self.field: 'ba'}, refresh=True)
def import_ontology(ontology: lib.obo.Ontology, index_name: str):
    es = elasticsearch.Elasticsearch()

    ies = IndicesClient(es)

    actions = [dict(
        _index=index_name,
        _type=index_name,
        _source=dict(
            id=item.id,
            names=item.names()
        )
    ) for item in ontology.items()]

    if ies.exists(index_name):
        ies.delete(index_name)
    ies.create(index_name)
    return bulk(es, actions=actions)
Example #58
0
    def __init__(self):
        """ setup Neo4j database connection and node labels
            and Elasticsearch mapping attachments index """

        self.db = GraphDatabase(self.db_path)
        self.pdf_documents = self.db.labels.create("PDFDocument")
        self.authors = self.db.labels.create("Author")
        self.keywords = self.db.labels.create("Keyword")

        self.es = Elasticsearch(self.es_cluster)
        self.es_ixc = IndicesClient(self.es)
        self.es_ixc.create(
            index="pdf_documents",
            body={
                'mappings': {
                    'pdf': {
                        'properties': {
                            'url': {'type': "string"},
                            'pdf_file': {'type': "attachment"}
                        }
                    }
                }
            }
        )
def main():
    # Define the globals
    global index_names
    global STARTED_TIMESTAMP
    global es
    global es_indices
    try:
        # Initiate the elasticsearch session using ES low-level client.
        # By default nodes are randomized before passed into the pool and round-robin strategy is used for load balancing.
        es = Elasticsearch(ES_HOSTS, timeout=30)
        es_indices = IndicesClient(es)

    except:
        print("Could not connect to elasticsearch!")
        sys.exit(1)

    print("Creating indices.. \n"),
    indices = generate_indices()
    print("Done!\n")

    # Register specific mapping definition for a specific type.
    print("Put Mapping \n"),
    es_indices.put_mapping(doc_type="_default_", body=mappings_body["_default_"], index="_all" )
    for type_name in types:
        es_indices.put_mapping(doc_type=type_name, body=mappings_body[type_name], index="_all" )
    print("Done!\n")

    # Retrieve mapping definition of index or index/type.
    print("GET Mapping \n"),
    print json.dumps(es_indices.get_mapping(index=["metrics_0", "metrics_1"],
                     doc_type=types),
                     sort_keys=True,
                     indent=4,
                     separators=(',', ': '))
    #print json.dumps(es_indices.get_settings(index="_all"), sort_keys=True,indent=4, separators=(',', ': '))
    print("Done!\n")

    # We will Clean up the indices by default
    # Default: True
    if CLEANUP:
        print("Cleaning up created indices.. "),
        cleanup_indices()
        print("Done!\n")