def get_list_of_indexes_to_reindex(full_reindex=False): db_names = all_db_names() try: list_of_indexes_out_of_sync = [] total_submissions = 0 for database_name in db_names: dbm = get_db_manager(database_name) questionnaires = dbm.load_all_rows_in_view('questionnaire') if not questionnaires: continue for row in questionnaires: if row['value']['is_registration_model']: continue form_model_doc = FormModelDocument.wrap(row["value"]) if full_reindex or is_mapping_out_of_sync(form_model_doc, dbm): es = Elasticsearch(hosts=[{ "host": ELASTIC_SEARCH_HOST, "port": ELASTIC_SEARCH_PORT }]) search = Search(using=es, index=dbm.database_name, doc_type=form_model_doc.id) no_of_submissions = search.count() questionnaire_info = dict( db_name=database_name, questionnaire_id=form_model_doc.id, name=form_model_doc.name, no_of_submissions=no_of_submissions) total_submissions += no_of_submissions list_of_indexes_out_of_sync.append(questionnaire_info) return list_of_indexes_out_of_sync, total_submissions except Exception as e: pass
def load_es_template(apps, schema_editor): es = Elasticsearch(hosts=[settings.ES_URL], verify_certs=False) es.put_template(id="climate_data_template", body=json.dumps({ "template": "climate_data", "mappings": { "*": { "properties": { "measurement": { "type": "double" }, "tmax": { "type": "double" }, "tmin": { "type": "double" }, "tmean": { "type": "double" }, "tdev": { "type": "double" }, "rainfall": { "type": "double" }, "sunshine": { "type": "double" }, "region": { "type": "keyword" } } } } }))
def __init__(self, urls=None, timeout=None, force_new=False, raw_results=False, **kwargs): ''' Creates a new ElasticSearch DSL object. Grabs the ElasticSearch connection from the pool if it has already been initialized. Otherwise, creates a new one. If no parameters are passed, everything is determined from the Django settings. :param urls: A list of URLs, or a single string of URL (without leading `http://`), or None to read from settings. :param idx: A list of indices or a single string representing an index_name name. Is optional. Will be merged with `idx_alias`. :param idx_alias: A list of index_name aliases or a single string representing an index_name alias, as defined in the settings. Will be merged with `index_name`. :param timeout: Timeout used in the connection. :param force_new: Set to `True` to force a new elasticsearch connection. Otherwise will aggressively use any connection with the exact same settings. :param **kwargs: Additional settings to pass to the low level elasticsearch client and to elasticsearch-sal-py.search.Search. ''' Bungiesearch.__load_settings__() urls = urls or Bungiesearch.BUNGIE['URLS'] if not timeout: timeout = Bungiesearch.BUNGIE.get('TIMEOUT', Bungiesearch.DEFAULT_TIMEOUT) search_keys = ['using', 'index', 'doc_type', 'extra'] search_settings, es_settings = {}, {} for k, v in iteritems(kwargs): if k in search_keys: search_settings[k] = v else: es_settings[k] = v if not es_settings: # If there aren't any provided elasticsearch settings, let's see if it's defined in the settings. es_settings = Bungiesearch.BUNGIE.get('ES_SETTINGS', {}) # Building a caching key to cache the es_instance for later use (and retrieved a previously cached es_instance). cache_key = Bungiesearch._build_key(urls, timeout, **es_settings) es_instance = None if not force_new: if cache_key in Bungiesearch._cached_es_instances: es_instance = Bungiesearch._cached_es_instances[cache_key] if not es_instance: es_instance = Elasticsearch(urls, timeout=timeout, **es_settings) Bungiesearch._cached_es_instances[cache_key] = es_instance if 'using' not in search_settings: search_settings['using'] = es_instance super(Bungiesearch, self).__init__(**search_settings) # Creating instance attributes. self._only = [ ] # Stores the exact fields to fetch from the database when mapping. self.results = [] # Store the mapped and unmapped results. self._raw_results_only = raw_results
def __init__(self, args): self.parse_args(args) self.conf = load_rules(self.args.config, use_rule=self.args.rule) self.max_query_size = self.conf['max_query_size'] self.rules = self.conf['rules'] self.debug = self.args.debug self.verbose = self.args.verbose self.writeback_index = self.conf['writeback_index'] self.es_host = self.conf['es_host'] self.es_port = self.conf['es_port'] self.run_every = self.conf['run_every'] self.alert_time_limit = self.conf['alert_time_limit'] self.old_query_limit = self.conf['old_query_limit'] self.alerts_sent = 0 self.num_hits = 0 self.current_es = None self.current_es_addr = None self.buffer_time = self.conf['buffer_time'] self.silence_cache = {} self.rule_hashes = get_rule_hashes(self.conf) self.writeback_es = Elasticsearch(host=self.es_host, port=self.es_port) if self.debug: self.verbose = True if self.verbose: logging.getLogger().setLevel(logging.INFO) for rule in self.rules: rule = self.init_rule(rule) if self.args.silence: self.silence()
def connect(): global _es if _es is None: server = config.get('elasticsearch_host', 'localhost') + ':9200' auth = config.get('elastic_search_basic_auth', None) _es = Elasticsearch(server, **{'http_auth': a for a in (auth, ) if a}) return _es
def main(): es_host = raw_input("Elasticsearch host: ") es_port = raw_input("Elasticsearch port: ") db_name = raw_input("Dashboard name: ") send_get_body_as = raw_input( "Method for querying Elasticsearch[GET]: ") or 'GET' es = Elasticsearch(host=es_host, port=es_port, send_get_body_as=send_get_body_as) query = {'query': {'term': {'_id': db_name}}} res = es.search(index='kibana-int', doc_type='dashboard', body=query, _source_include=['dashboard']) if not res['hits']['hits']: print("No dashboard %s found" % (db_name)) exit() db = json.loads(res['hits']['hits'][0]['_source']['dashboard']) config_filters = filters_from_dashboard(db) print("\nPartial Config file") print("-----------\n") print("name: %s" % (db_name)) print("es_host: %s" % (es_host)) print("es_port: %s" % (es_port)) print("filter:") print(yaml.safe_dump(config_filters))
def configure_client(self): """Instantiate and configure the ElasticSearch client. It simply takes the given HOSTS list and uses PARAMS as the keyword arguments of the ElasticSearch class. The client's transport_class is given by the class attribute ``transport_class``, and the connection class used by the transport class is given by the class attribute ``connection_class``. An ``ImproperlyConfigured`` exception is raised if any of these elements is undefined. """ hosts = self.server['HOSTS'] params = self.server['PARAMS'] if not self.transport_class: raise ImproperlyConfigured( 'Djangoes backend %r is not properly configured: ' 'no transport class provided' % self.__class__) if not self.connection_class: raise ImproperlyConfigured( 'Djangoes backend %r is not properly configured: ' 'no connection class provided' % self.__class__) #pylint: disable=star-args self.client = Elasticsearch(hosts, transport_class=self.transport_class, connection_class=self.connection_class, **params)
def get(self, request): database_name = get_database_name(request.user) search_text = lower(request.GET["term"] or "") es = Elasticsearch(hosts=[{ "host": ELASTIC_SEARCH_HOST, "port": ELASTIC_SEARCH_PORT }]) search = Search(using=es, index=database_name, doc_type="reporter") search = search.extra(**{"size": "10"}) resp = [] if search_text: query_text_escaped = ElasticUtilsHelper().replace_special_chars( search_text) query_fields = [ "name", "name_value", "name_exact", "short_code", "short_code_exact", "short_code_value" ] search = search.query("query_string", query=query_text_escaped, fields=query_fields) search_results = search.execute() resp = [{ "id": result.short_code, "label": self.get_label(result) } for result in search_results.hits] return HttpResponse(json.dumps(resp))
def __init__(self, hostName, postNum): self.host = hostName self.post = postNum if (requests.get('http://' + self.host + ':' + self.post).content): # Connect to cluster self.es = Elasticsearch([{'host': self.host, 'port': self.post}]) else: print("Please turn on elasticsearch")
def get_instance(): if ESLowLevelClient.__es is None: with ESLowLevelClient.__es_lock: if ESLowLevelClient.__es is None: ESLowLevelClient.__es = Elasticsearch(['localhost'], port=9200, maxsize=25) return ESLowLevelClient.__es
def get_all_terms(self, args): """ Performs a terms aggregation for each field to get every existing term. """ self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50)) window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30})) field_name = {"field": "", "size": 2147483647} # Integer.MAX_VALUE query_template = {"aggs": {"values": {"terms": field_name}}} if args and args.start: end = ts_to_dt(args.start) else: end = ts_now() start = end - window_size if self.rules.get('use_strftime_index'): index = format_index(self.rules['index'], start, end) else: index = self.rules['index'] time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} query = {'aggs': {'filtered': query_template}} for field in self.fields: # For composite keys, we will need to perform sub-aggregations if type(field) == list: level = query_template['aggs'] # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query for i, sub_field in enumerate(field): level['values']['terms']['field'] = sub_field if i < len(field) - 1: # If we have more fields after the current one, then set up the next nested structure level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}} level = level['values']['aggs'] else: # For non-composite keys, only a single agg is needed field_name['field'] = field res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s') if 'aggregations' in res: buckets = res['aggregations']['filtered']['values']['buckets'] if type(field) == list: # For composite keys, make the lookup based on all fields # Make it a tuple since it can be hashed and used in dictionary lookups self.seen_values[tuple(field)] = [] for bucket in buckets: # We need to walk down the hierarchy and obtain the value at each level self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket) # If we don't have any results, it could either be because of the absence of any baseline data # OR it may be because the composite key contained a non-primitive type. Either way, give the # end-users a heads up to help them debug what might be going on. if not self.seen_values[tuple(field)]: elastalert_logger.warning(( 'No results were found from all sub-aggregations. This can either indicate that there is ' 'no baseline data OR that a non-primitive field was used in a composite key.' )) else: keys = [bucket['key'] for bucket in buckets] self.seen_values[field] = keys elastalert_logger.info('Found %s unique values for %s' % (len(keys), field)) else: self.seen_values[field] = [] elastalert_logger.info('Found no values for %s' % (field))
def _engine(): ":rtype elasticsearch.Elasticsearch" global _elastic if (not _elastic): _elastic = Elasticsearch([{ "host": settings.ELASTIC_HOST, "port": int(settings.ELASTIC_PORT) }]) return _elastic
def __init__(self, index_name, index_type, ip="127.0.0.1"): ''' @param index_name: 索引名称 @param index_type: 索引类型 ''' self.index_name = index_name self.index_type = index_type self.es = Elasticsearch([ip])
def populate_elastic_search(request=None, project_id=None): # 1. Create tag from "project_id" + "type" + "tag" #2. Get from all region cache, instances. #3. Generate index for each project #4. List the tag in the respective project index and doc type. project_id = project_id if project_id \ else json.loads(request.session['project_id']) index_name = elastic_cache_key(project_id, 'ec2') ebs_index_name = elastic_cache_key(project_id, 'ebs') elb_index_name = elastic_cache_key(project_id, 'elb') eip_index_name = elastic_cache_key(project_id, 'eip') vpc_index_name = elastic_cache_key(project_id, 'vpc') subnet_index_name = elastic_cache_key(project_id, 'subnet') security_group_index_name = elastic_cache_key(project_id, 'security_group') client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES) try: # First try to delete the index for this project if already exists client.indices.delete(index=[ index_name, ebs_index_name, elb_index_name, eip_index_name, vpc_index_name, security_group_index_name, subnet_index_name ]) except TransportError as e: LOG.error("Error while deleting the index {0} error : " "{1}".format(index_name, e)) try: obj_list = [] obj_list.extend( populate_ec2_indexes(request=request, project_id=project_id)) obj_list.extend( populate_ebs_indexes(request=request, project_id=project_id)) obj_list.extend( populate_elb_indexes(request=request, project_id=project_id)) obj_list.extend( populate_eip_indexes(request=request, project_id=project_id)) obj_list.extend( populate_vpc_indexes(request=request, project_id=project_id)) obj_list.extend( populate_subnet_indexes(request=request, project_id=project_id)) obj_list.extend( populate_security_group_indexes(request=request, project_id=project_id)) if obj_list: elastic_index_res = helpers.bulk( client, obj_list, stats_only=True) # Index elastic search in bulk LOG.info("Indexed {0} items Failed {1} items".format( elastic_index_res[0], elastic_index_res[1])) except Exception as e: LOG.error("Error while indexing project {0} error {1}".format( project_id, e))
def search_fuzzy(request=None, project_id=None): project_id = project_id if project_id \ else json.loads(request.session['project_id']) index_name = elastic_cache_key(project_id, 'ec2') ebs_index_name = elastic_cache_key(project_id, 'ebs') elb_index_name = elastic_cache_key(project_id, 'elb') eip_index_name = elastic_cache_key(project_id, 'eip') vpc_index_name = elastic_cache_key(project_id, 'vpc') subnet_index_name = elastic_cache_key(project_id, 'subnet') security_group_index_name = elastic_cache_key(project_id, 'security_group') st = request.GET.get('st', None) client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES) query = { "query": { "query_string": { "fields": ["title"], "query": "*" + st + "*", } }, } total = client.search(index=[ index_name, ebs_index_name, elb_index_name, eip_index_name, vpc_index_name, subnet_index_name, security_group_index_name ], doc_type=[ "instance_id", "name_title", "prip_title", "puip_title", "ebs", "eip", "elb", "vpc", "subnet", "security_group_id", "security_group_name" ], body=query, ignore_unavailable=True)['hits']['total'] # Get Total search result and set size parameter equal to that, to get all results # ToDo Discuss and Optimize query['size'] = total search_results = client.search(index=[ index_name, ebs_index_name, elb_index_name, eip_index_name, vpc_index_name, subnet_index_name, security_group_index_name ], doc_type=[ "instance_id", "name_title", "prip_title", "puip_title", "ebs", "eip", "elb", "vpc", "subnet", "security_group_id", "security_group_name" ], body=query, ignore_unavailable=True) return search_results
class Mysql2Es(): config = { "db":{ "host":"192.168.0.196", "user":"******", "passwd":"xsycommercial123", "db":"prism1", "charset":"utf8" }, "max_query":"select max(id) from company", "query":"select id,name,company_org_type,reg_status from company", "index":{ "host":["http://192.168.0.196:9200","http://192.168.0.197:9200","http://192.168.0.198:9200"], "_index":"company0606", "_type":"company" }, "action":"index", "_id":"id" } def __init__(self,start_id=0,max_id=100000,step=10000,id_file=None,config=None): if config != None: self.config = json.loads(open(config).read()) #=================================================================== # connect to mysql #=================================================================== self.db = None try: self.db = MySQLdb.connect(**self.config["db"]) except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit (1) #=================================================================== # query select from table #=================================================================== self.cursor = self.db.cursor() #self.cursor.execute(self.config["max_query"]) self.start_id = start_id self.max_id = max_id self.step = step self.id_file = id_file self.limit = 50000 self.action = self.config['action'] self.metadata = {"_index":self.config["index"]["_index"],"_type":self.config["index"]["_type"]} self.es = Elasticsearch(self.config["index"]["host"]) self.mutex = threading.Lock() self.thread_num = 0 self.db_data=[] self.complete = False
def get_es(self): if self.es is None: ssl_url = self.es_url.startswith('https') if ssl_url: # TODO add valid cert in ES setup logger.warning('ES does not use cert validation.') self.es = Elasticsearch([self.es_url], verify_certs=False) return self.es
def run_rule(self, rule): """ Run a rule including querying and alerting on results. :param rule: The rule configuration. :return: The number of matches that the rule produced. """ elastalert_logger.info('Start to run rule: %s', rule.get('name')) # Run the rule. If querying over a large time period, split it up into segments self.num_hits = 0 rule_request = rule.get("input").get("search").get("request") if rule_request.get("elastic_host", None) is not None and rule_request.get( "elastic_port", None) is not None: self.current_es = Elasticsearch( host=rule.get("input").get("search").get("request").get( "elastic_host"), port=rule.get("input").get("search").get("request").get( "elastic_port")) else: self.current_es = self.new_elasticsearch(self.global_config) self.run_query(rule) # Process any new matches num_matches = len(rule['type'].matches) while rule['type'].matches: match = rule['type'].matches.pop(0) #if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']): # elastalert_logger.info('Ignoring match for silenced rule %s%s' % (rule['name'], key)) # continue if rule.get('realert'): next_alert, exponent = self.next_alert_time( rule, rule['name'] + key, ts_now()) self.set_realert(rule['name'] + key, next_alert, exponent) # If no aggregation, alert immediately #if not rule['aggregation']: # self.alert([match], rule) # continue self.alert([match], rule) # Add it as an aggregated match #self.add_aggregated_alert(match, rule) # Mark this endtime for next run's start #rule['previous_endtime'] = endtime #time_taken = time.time() - run_start return num_matches
def main(host, port, index, type, chunk_size, geojson_file): def _charge_doc(): for feature in load_geojson(geojson_file): yield { '_index': index, '_type': type, '_source': feature } es = Elasticsearch(host=host, port=port) helpers.bulk(es, _charge_doc(), chunk_size=chunk_size, request_timeout=6000)
def elasticsearch_client(conf): """ returns an Elasticsearch instance configured using an es_conn_config """ es_conn_conf = build_es_conn_config(conf) return Elasticsearch(host=es_conn_conf['es_host'], port=es_conn_conf['es_port'], url_prefix=es_conn_conf['es_url_prefix'], use_ssl=es_conn_conf['use_ssl'], verify_certs=es_conn_conf['verify_certs'], connection_class=RequestsHttpConnection, timeout=es_conn_conf['es_conn_timeout'], send_get_body_as=es_conn_conf['send_get_body_as'])
def handle_error(self, message, data=None): ''' Logs message at error level and writes message, data and traceback to Elasticsearch. ''' if not self.writeback_es: self.writeback_es = Elasticsearch(host=self.es_host, port=self.es_port) logging.error(message) body = {'message': message} tb = traceback.format_exc() body['traceback'] = tb.strip().split('\n') if data: body['data'] = data self.writeback('elastalert_error', body)
def main(): parser = ArgumentParser() parser.add_argument('-e', '--elasticsearch-server', default='localhost:9200') parser.add_argument('-d', '--dataset') parser.add_argument('-s', '--sections') opts = parser.parse_args() es_hosts = [opts.elasticsearch_server] dataset_name = opts.dataset dataset_sections = opts.sections es = Elasticsearch(hosts=es_hosts, timeout=120) if dataset_name == 'newsgroups': dataset = NewsgroupsDataset() elif dataset_name == 'aviskorpus': sections = None sources = None if dataset_sections: try: sections, sources = dataset_sections.split('-') sections = [int(s) for s in sections.split('|')] sources = [s for s in sources.split('|')] except Exception: logging.error('Malformed section specification "%s" ...' % dataset_sections) sys.exit(1) dataset = AviskorpusDataset(sections=sections, sources=sources) elif dataset_name == 'ndt': sections = None lang = None if dataset_sections: try: sections, lang = dataset_sections.split('-') sections = [int(s) for s in sections.split('|')] lang = [s for s in lang.split('|')] except Exception: logging.error('Malformed section specification "%s" ...' % dataset_sections) sys.exit(1) dataset = NDTDataset(lang=lang, sections=sections) else: logging.error('Unknown dataset %s ...' % dataset_name) sys.exit(1) dataset.install(es)
def filter(self, qs, value): client = Elasticsearch([settings.ELASTICSEARCH_HOST]) value = value.lower() search_query = { "bool": { "must_not": [ # исключает из выдачи is_published=False { "term": { "is_published": False } } ], "should": [ { "simple_query_string": { "fields": ["category_name"], "quote_field_suffix": ".exact", "query": value } }, ] } } s = Search(using=client, index='category') \ .query(search_query)\ .sort("_score", "-views")\ .extra(size=self.max_result, from_=0) hits_list = [] items = s.execute() if items: for item in items: hits_list.append(item.meta.id) hits_order = Case( *[When(pk=pk, then=pos) for pos, pk in enumerate(hits_list)]) qs = qs.filter(id__in=hits_list).order_by(hits_order) else: qs = qs.none() # TODO: fallback? # bits = value.split(' ') # search_clauses = reduce(operator.and_, # [Q(title__icontains=v) for v in bits]) # unpublished = Category.objects.get_queryset_descendants( # Category.objects.filter(is_published=False), include_self=True) # qs = (qs # .exclude(pk__in=unpublished) # .filter(search_clauses) # .order_by('-views')) return qs[:self.max_result]
def _connect(self): """ connect to a member of the ElasticSearch cluster """ try: if self.local_env: self.es = Elasticsearch([{'host': self.host, 'port': self.port}]) else: self.es = Elasticsearch([{'host': self.host, 'port': self.port}], sniff_on_start=True, sniff_on_connection_fail=True, sniffer_timeout=self.timeout) self.idx = IndicesClient(self.es) return except ConnectionError as e: return ElasticSearchError.no_host_available(self.host, self.port) except Exception as e: (type_e, value, traceback_prev) = exc_info() backtrace = extract_tb(traceback_prev) return ElasticSearchError.unknown_exception(backtrace, str(e))
def createElasticsearchClient(conf): auth = None username = os.environ.get('ELASTICSEARCH_USERNAME') password = os.environ.get('ELASTICSEARCH_PASSWORD') if username and password: auth = (username, password) return Elasticsearch( host=conf['elasticsearch']['host'], port=conf['elasticsearch']['port'], url_prefix=conf['elasticsearch']['urlPrefix'], use_ssl=conf['elasticsearch']['sslEnabled'], verify_certs=conf['elasticsearch']['sslStrictEnabled'], http_auth=auth, timeout=conf['elasticsearch']['timeoutSeconds'])
def test_repr_truncates_host_to_10(self): hosts = [{"host": "es" + str(i)} for i in range(20)] self.assertEquals( '<Elasticsearch(%r)>' % [{ 'host': 'es0' }, { 'host': 'es1' }, { 'host': 'es2' }, { 'host': 'es3' }, { 'host': 'es4' }, '...'], repr(Elasticsearch(hosts)))
def create_es_publisher_job(*, elasticsearch, host, neo4j, **kwargs): """ :param elasticsearch_index_alias: alias for Elasticsearch used in amundsensearchlibrary/search_service/config.py as an index :param elasticsearch_doc_type_key: name the ElasticSearch index is prepended with. Defaults to `table` resulting in `table_search_index` :param model_name: the Databuilder model class used in transporting between Extractor and Loader :param cypher_query: Query handed to the `Neo4jSearchDataExtractor` class, if None is given (default) it uses the `Table` query baked into the Extractor :param elasticsearch_mapping: Elasticsearch field mapping "DDL" handed to the `ElasticsearchPublisher` class, if None is given (default) it uses the `Table` query baked into the Publisher """ elasticsearch_client = Elasticsearch([{'host': elasticsearch["host"]}]) # unique name of new index in Elasticsearch elasticsearch_new_index_key = 'tables' + str(uuid.uuid4()) data_path = host["es_data_path"] job_config = ConfigFactory.from_dict({ f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.GRAPH_URL_CONFIG_KEY}': neo4j["endpoint"], f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.MODEL_CLASS_CONFIG_KEY}': 'databuilder.models.table_elasticsearch_document.TableESDocument', f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.NEO4J_AUTH_USER}': neo4j["user"], f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.NEO4J_AUTH_PW}': neo4j["password"], f'loader.filesystem.elasticsearch.{FSElasticsearchJSONLoader.FILE_PATH_CONFIG_KEY}': data_path, f'loader.filesystem.elasticsearch.{FSElasticsearchJSONLoader.FILE_MODE_CONFIG_KEY}': 'w', f'publisher.elasticsearch.{ElasticsearchPublisher.FILE_PATH_CONFIG_KEY}': data_path, f'publisher.elasticsearch.{ElasticsearchPublisher.FILE_MODE_CONFIG_KEY}': 'r', f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_CLIENT_CONFIG_KEY}': elasticsearch_client, f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_NEW_INDEX_CONFIG_KEY}': elasticsearch_new_index_key, f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_DOC_TYPE_CONFIG_KEY}': 'table', f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_ALIAS_CONFIG_KEY}': 'table_search_index', }) task = DefaultTask(loader=FSElasticsearchJSONLoader(), extractor=Neo4jSearchDataExtractor(), transformer=NoopTransformer()) job = DefaultJob(conf=job_config, task=task, publisher=ElasticsearchPublisher()) return job
def update(): for doc in scan(Elasticsearch(ES_DESTINATION), query={"query": { "match_all": {} }}, index="smartapi_docs", scroll="60m"): print(doc["_id"]) smartapi = SmartAPI.get(doc["_id"]) print(smartapi.check()) print(smartapi.refresh()) if smartapi.webdoc.status == 299: smartapi.webdoc._status = 200 # change status not reliable during migration smartapi.save() print()
def autocomplete_elastic_tags(request): client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES) index_name = elastic_cache_key(request.session['project_id'], 'ec2') if not client.indices.exists(index_name): try: populate_elastic_search(project_id=request.session['project_id']) except: import traceback LOG.error("Cannot build index: %s" % traceback.format_exc()) raise Exception('Cache values not present') return dict(id='ins_id', name='ins_name', private_ip_address='private_ip_address', ip_address='ip_address')
def get_dashboard(self, rule, db_name): """ Download dashboard which matches use_kibana_dashboard from elasticsearch. """ es = Elasticsearch(host=rule['es_host'], port=rule['es_port']) if not db_name: raise EAException("use_kibana_dashboard undefined") query = {'query': {'term': {'_id': db_name}}} try: res = es.search(index='kibana-int', doc_type='dashboard', body=query, _source_include=['dashboard']) except ElasticsearchException as e: raise EAException("Error querying for dashboard: %s" % (e)) if res['hits']['hits']: return json.loads(res['hits']['hits'][0]['_source']['dashboard']) else: raise EAException("Could not find dashboard named %s" % (db_name))