Example #1
0
def get_list_of_indexes_to_reindex(full_reindex=False):
    db_names = all_db_names()
    try:
        list_of_indexes_out_of_sync = []
        total_submissions = 0
        for database_name in db_names:
            dbm = get_db_manager(database_name)
            questionnaires = dbm.load_all_rows_in_view('questionnaire')
            if not questionnaires:
                continue
            for row in questionnaires:
                if row['value']['is_registration_model']:
                    continue

                form_model_doc = FormModelDocument.wrap(row["value"])
                if full_reindex or is_mapping_out_of_sync(form_model_doc, dbm):
                    es = Elasticsearch(hosts=[{
                        "host": ELASTIC_SEARCH_HOST,
                        "port": ELASTIC_SEARCH_PORT
                    }])
                    search = Search(using=es,
                                    index=dbm.database_name,
                                    doc_type=form_model_doc.id)
                    no_of_submissions = search.count()
                    questionnaire_info = dict(
                        db_name=database_name,
                        questionnaire_id=form_model_doc.id,
                        name=form_model_doc.name,
                        no_of_submissions=no_of_submissions)
                    total_submissions += no_of_submissions
                    list_of_indexes_out_of_sync.append(questionnaire_info)
        return list_of_indexes_out_of_sync, total_submissions
    except Exception as e:
        pass
Example #2
0
def load_es_template(apps, schema_editor):
    es = Elasticsearch(hosts=[settings.ES_URL], verify_certs=False)
    es.put_template(id="climate_data_template", body=json.dumps({
        "template": "climate_data",
        "mappings": {
            "*": {
                "properties": {
                    "measurement": {
                        "type": "double"
                    },
                    "tmax": {
                        "type": "double"
                    },
                    "tmin": {
                        "type": "double"
                    },
                    "tmean": {
                        "type": "double"
                    },
                    "tdev": {
                        "type": "double"
                    },
                    "rainfall": {
                        "type": "double"
                    },
                    "sunshine": {
                        "type": "double"
                    },
                    "region": {
                        "type": "keyword"
                    }
                }
            }
        }
    }))
Example #3
0
    def __init__(self,
                 urls=None,
                 timeout=None,
                 force_new=False,
                 raw_results=False,
                 **kwargs):
        '''
        Creates a new ElasticSearch DSL object. Grabs the ElasticSearch connection from the pool
        if it has already been initialized. Otherwise, creates a new one.

        If no parameters are passed, everything is determined from the Django settings.

        :param urls: A list of URLs, or a single string of URL (without leading `http://`), or None to read from settings.
        :param idx: A list of indices or a single string representing an index_name name. Is optional. Will be merged with `idx_alias`.
        :param idx_alias: A list of index_name aliases or a single string representing an index_name alias, as defined in the settings. Will be merged with `index_name`.
        :param timeout: Timeout used in the connection.
        :param force_new: Set to `True` to force a new elasticsearch connection. Otherwise will aggressively use any connection with the exact same settings.
        :param **kwargs: Additional settings to pass to the low level elasticsearch client and to elasticsearch-sal-py.search.Search.
        '''

        Bungiesearch.__load_settings__()

        urls = urls or Bungiesearch.BUNGIE['URLS']
        if not timeout:
            timeout = Bungiesearch.BUNGIE.get('TIMEOUT',
                                              Bungiesearch.DEFAULT_TIMEOUT)

        search_keys = ['using', 'index', 'doc_type', 'extra']
        search_settings, es_settings = {}, {}
        for k, v in iteritems(kwargs):
            if k in search_keys:
                search_settings[k] = v
            else:
                es_settings[k] = v

        if not es_settings:
            # If there aren't any provided elasticsearch settings, let's see if it's defined in the settings.
            es_settings = Bungiesearch.BUNGIE.get('ES_SETTINGS', {})

        # Building a caching key to cache the es_instance for later use (and retrieved a previously cached es_instance).
        cache_key = Bungiesearch._build_key(urls, timeout, **es_settings)
        es_instance = None
        if not force_new:
            if cache_key in Bungiesearch._cached_es_instances:
                es_instance = Bungiesearch._cached_es_instances[cache_key]

        if not es_instance:
            es_instance = Elasticsearch(urls, timeout=timeout, **es_settings)
            Bungiesearch._cached_es_instances[cache_key] = es_instance

        if 'using' not in search_settings:
            search_settings['using'] = es_instance

        super(Bungiesearch, self).__init__(**search_settings)

        # Creating instance attributes.
        self._only = [
        ]  # Stores the exact fields to fetch from the database when mapping.
        self.results = []  # Store the mapped and unmapped results.
        self._raw_results_only = raw_results
Example #4
0
    def __init__(self, args):
        self.parse_args(args)
        self.conf = load_rules(self.args.config, use_rule=self.args.rule)
        self.max_query_size = self.conf['max_query_size']
        self.rules = self.conf['rules']
        self.debug = self.args.debug
        self.verbose = self.args.verbose
        self.writeback_index = self.conf['writeback_index']
        self.es_host = self.conf['es_host']
        self.es_port = self.conf['es_port']
        self.run_every = self.conf['run_every']
        self.alert_time_limit = self.conf['alert_time_limit']
        self.old_query_limit = self.conf['old_query_limit']
        self.alerts_sent = 0
        self.num_hits = 0
        self.current_es = None
        self.current_es_addr = None
        self.buffer_time = self.conf['buffer_time']
        self.silence_cache = {}
        self.rule_hashes = get_rule_hashes(self.conf)

        self.writeback_es = Elasticsearch(host=self.es_host, port=self.es_port)

        if self.debug:
            self.verbose = True

        if self.verbose:
            logging.getLogger().setLevel(logging.INFO)

        for rule in self.rules:
            rule = self.init_rule(rule)

        if self.args.silence:
            self.silence()
Example #5
0
def connect():
    global _es
    if _es is None:
        server = config.get('elasticsearch_host', 'localhost') + ':9200'
        auth = config.get('elastic_search_basic_auth', None)
        _es = Elasticsearch(server, **{'http_auth': a for a in (auth, ) if a})
    return _es
def main():
    es_host = raw_input("Elasticsearch host: ")
    es_port = raw_input("Elasticsearch port: ")
    db_name = raw_input("Dashboard name: ")
    send_get_body_as = raw_input(
        "Method for querying Elasticsearch[GET]: ") or 'GET'
    es = Elasticsearch(host=es_host,
                       port=es_port,
                       send_get_body_as=send_get_body_as)
    query = {'query': {'term': {'_id': db_name}}}
    res = es.search(index='kibana-int',
                    doc_type='dashboard',
                    body=query,
                    _source_include=['dashboard'])
    if not res['hits']['hits']:
        print("No dashboard %s found" % (db_name))
        exit()

    db = json.loads(res['hits']['hits'][0]['_source']['dashboard'])
    config_filters = filters_from_dashboard(db)

    print("\nPartial Config file")
    print("-----------\n")
    print("name: %s" % (db_name))
    print("es_host: %s" % (es_host))
    print("es_port: %s" % (es_port))
    print("filter:")
    print(yaml.safe_dump(config_filters))
Example #7
0
    def configure_client(self):
        """Instantiate and configure the ElasticSearch client.

        It simply takes the given HOSTS list and uses PARAMS as the keyword
        arguments of the ElasticSearch class.

        The client's transport_class is given by the class attribute
        ``transport_class``, and the connection class used by the transport
        class is given by the class attribute ``connection_class``.

        An ``ImproperlyConfigured`` exception is raised if any of these
        elements is undefined.
        """
        hosts = self.server['HOSTS']
        params = self.server['PARAMS']

        if not self.transport_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no transport class provided' % self.__class__)

        if not self.connection_class:
            raise ImproperlyConfigured(
                'Djangoes backend %r is not properly configured: '
                'no connection class provided' % self.__class__)

        #pylint: disable=star-args
        self.client = Elasticsearch(hosts,
                                    transport_class=self.transport_class,
                                    connection_class=self.connection_class,
                                    **params)
Example #8
0
 def get(self, request):
     database_name = get_database_name(request.user)
     search_text = lower(request.GET["term"] or "")
     es = Elasticsearch(hosts=[{
         "host": ELASTIC_SEARCH_HOST,
         "port": ELASTIC_SEARCH_PORT
     }])
     search = Search(using=es, index=database_name, doc_type="reporter")
     search = search.extra(**{"size": "10"})
     resp = []
     if search_text:
         query_text_escaped = ElasticUtilsHelper().replace_special_chars(
             search_text)
         query_fields = [
             "name", "name_value", "name_exact", "short_code",
             "short_code_exact", "short_code_value"
         ]
         search = search.query("query_string",
                               query=query_text_escaped,
                               fields=query_fields)
         search_results = search.execute()
         resp = [{
             "id": result.short_code,
             "label": self.get_label(result)
         } for result in search_results.hits]
     return HttpResponse(json.dumps(resp))
Example #9
0
	def __init__(self, hostName, postNum):
		self.host = hostName
		self.post = postNum
		if (requests.get('http://' + self.host + ':' + self.post).content):
			# Connect to cluster
			self.es = Elasticsearch([{'host': self.host, 'port': self.post}])
		else:
			print("Please turn on elasticsearch")
Example #10
0
 def get_instance():
     if ESLowLevelClient.__es is None:
         with ESLowLevelClient.__es_lock:
             if ESLowLevelClient.__es is None:
                 ESLowLevelClient.__es = Elasticsearch(['localhost'],
                                                       port=9200,
                                                       maxsize=25)
     return ESLowLevelClient.__es
Example #11
0
    def get_all_terms(self, args):
        """ Performs a terms aggregation for each field to get every existing term. """
        self.es = Elasticsearch(host=self.rules['es_host'], port=self.rules['es_port'], timeout=self.rules.get('es_conn_timeout', 50))
        window_size = datetime.timedelta(**self.rules.get('terms_window_size', {'days': 30}))
        field_name = {"field": "", "size": 2147483647}  # Integer.MAX_VALUE
        query_template = {"aggs": {"values": {"terms": field_name}}}
        if args and args.start:
            end = ts_to_dt(args.start)
        else:
            end = ts_now()
        start = end - window_size
        if self.rules.get('use_strftime_index'):
            index = format_index(self.rules['index'], start, end)
        else:
            index = self.rules['index']
        time_filter = {self.rules['timestamp_field']: {'lte': dt_to_ts(end), 'gte': dt_to_ts(start)}}
        query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}}
        query = {'aggs': {'filtered': query_template}}

        for field in self.fields:
            # For composite keys, we will need to perform sub-aggregations
            if type(field) == list:
                level = query_template['aggs']
                # Iterate on each part of the composite key and add a sub aggs clause to the elastic search query
                for i, sub_field in enumerate(field):
                    level['values']['terms']['field'] = sub_field
                    if i < len(field) - 1:
                        # If we have more fields after the current one, then set up the next nested structure
                        level['values']['aggs'] = {'values': {'terms': copy.deepcopy(field_name)}}
                        level = level['values']['aggs']
            else:
                # For non-composite keys, only a single agg is needed
                field_name['field'] = field
            res = self.es.search(body=query, index=index, ignore_unavailable=True, timeout='50s')
            if 'aggregations' in res:
                buckets = res['aggregations']['filtered']['values']['buckets']
                if type(field) == list:
                    # For composite keys, make the lookup based on all fields
                    # Make it a tuple since it can be hashed and used in dictionary lookups
                    self.seen_values[tuple(field)] = []
                    for bucket in buckets:
                        # We need to walk down the hierarchy and obtain the value at each level
                        self.seen_values[tuple(field)] += self.flatten_aggregation_hierarchy(bucket)
                    # If we don't have any results, it could either be because of the absence of any baseline data
                    # OR it may be because the composite key contained a non-primitive type.  Either way, give the
                    # end-users a heads up to help them debug what might be going on.
                    if not self.seen_values[tuple(field)]:
                        elastalert_logger.warning((
                            'No results were found from all sub-aggregations.  This can either indicate that there is '
                            'no baseline data OR that a non-primitive field was used in a composite key.'
                        ))
                else:
                    keys = [bucket['key'] for bucket in buckets]
                    self.seen_values[field] = keys
                    elastalert_logger.info('Found %s unique values for %s' % (len(keys), field))
            else:
                self.seen_values[field] = []
                elastalert_logger.info('Found no values for %s' % (field))
Example #12
0
def _engine():
    ":rtype elasticsearch.Elasticsearch"
    global _elastic
    if (not _elastic):
        _elastic = Elasticsearch([{
            "host": settings.ELASTIC_HOST,
            "port": int(settings.ELASTIC_PORT)
        }])
    return _elastic
Example #13
0
    def __init__(self, index_name, index_type, ip="127.0.0.1"):
        '''
        @param index_name: 索引名称
        @param index_type: 索引类型
        '''
        self.index_name = index_name
        self.index_type = index_type

        self.es = Elasticsearch([ip])
Example #14
0
def populate_elastic_search(request=None, project_id=None):
    # 1. Create tag from "project_id" + "type" + "tag"
    #2. Get from all region cache, instances.
    #3. Generate index for each project
    #4. List the tag in the respective project index and doc type.
    project_id = project_id if project_id \
        else json.loads(request.session['project_id'])

    index_name = elastic_cache_key(project_id, 'ec2')
    ebs_index_name = elastic_cache_key(project_id, 'ebs')
    elb_index_name = elastic_cache_key(project_id, 'elb')
    eip_index_name = elastic_cache_key(project_id, 'eip')
    vpc_index_name = elastic_cache_key(project_id, 'vpc')
    subnet_index_name = elastic_cache_key(project_id, 'subnet')
    security_group_index_name = elastic_cache_key(project_id, 'security_group')

    client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES)

    try:
        # First try to delete the index for this project if already exists
        client.indices.delete(index=[
            index_name, ebs_index_name, elb_index_name, eip_index_name,
            vpc_index_name, security_group_index_name, subnet_index_name
        ])
    except TransportError as e:
        LOG.error("Error while deleting the index {0} error : "
                  "{1}".format(index_name, e))

    try:
        obj_list = []
        obj_list.extend(
            populate_ec2_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_ebs_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_elb_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_eip_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_vpc_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_subnet_indexes(request=request, project_id=project_id))
        obj_list.extend(
            populate_security_group_indexes(request=request,
                                            project_id=project_id))

        if obj_list:
            elastic_index_res = helpers.bulk(
                client, obj_list,
                stats_only=True)  # Index elastic search in bulk
            LOG.info("Indexed {0} items Failed {1} items".format(
                elastic_index_res[0], elastic_index_res[1]))

    except Exception as e:
        LOG.error("Error while indexing project {0} error {1}".format(
            project_id, e))
Example #15
0
def search_fuzzy(request=None, project_id=None):
    project_id = project_id if project_id \
        else json.loads(request.session['project_id'])

    index_name = elastic_cache_key(project_id, 'ec2')
    ebs_index_name = elastic_cache_key(project_id, 'ebs')
    elb_index_name = elastic_cache_key(project_id, 'elb')
    eip_index_name = elastic_cache_key(project_id, 'eip')
    vpc_index_name = elastic_cache_key(project_id, 'vpc')
    subnet_index_name = elastic_cache_key(project_id, 'subnet')
    security_group_index_name = elastic_cache_key(project_id, 'security_group')

    st = request.GET.get('st', None)
    client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES)

    query = {
        "query": {
            "query_string": {
                "fields": ["title"],
                "query": "*" + st + "*",
            }
        },
    }

    total = client.search(index=[
        index_name, ebs_index_name, elb_index_name, eip_index_name,
        vpc_index_name, subnet_index_name, security_group_index_name
    ],
                          doc_type=[
                              "instance_id", "name_title", "prip_title",
                              "puip_title", "ebs", "eip", "elb", "vpc",
                              "subnet", "security_group_id",
                              "security_group_name"
                          ],
                          body=query,
                          ignore_unavailable=True)['hits']['total']

    # Get Total search result and set size parameter equal to that, to get all results
    # ToDo Discuss and Optimize
    query['size'] = total

    search_results = client.search(index=[
        index_name, ebs_index_name, elb_index_name, eip_index_name,
        vpc_index_name, subnet_index_name, security_group_index_name
    ],
                                   doc_type=[
                                       "instance_id", "name_title",
                                       "prip_title", "puip_title", "ebs",
                                       "eip", "elb", "vpc", "subnet",
                                       "security_group_id",
                                       "security_group_name"
                                   ],
                                   body=query,
                                   ignore_unavailable=True)
    return search_results
Example #16
0
class Mysql2Es():
    config = {
        "db":{
            "host":"192.168.0.196",
            "user":"******",
            "passwd":"xsycommercial123",
            "db":"prism1",
            "charset":"utf8"
            },
        "max_query":"select max(id) from company",
        "query":"select id,name,company_org_type,reg_status from company",
        "index":{
            "host":["http://192.168.0.196:9200","http://192.168.0.197:9200","http://192.168.0.198:9200"],
            "_index":"company0606",
            "_type":"company"
            },    
        "action":"index",
        "_id":"id"
        }


    def __init__(self,start_id=0,max_id=100000,step=10000,id_file=None,config=None):
        if config != None:
            self.config = json.loads(open(config).read())
        #===================================================================
        # connect to mysql
        #===================================================================
        self.db = None 
        try:
            self.db = MySQLdb.connect(**self.config["db"])
        except MySQLdb.Error, e:
            print "Error %d: %s" % (e.args[0], e.args[1])
            sys.exit (1)
        
        #===================================================================
        # query select from table
        #===================================================================
        
        self.cursor = self.db.cursor()   
        #self.cursor.execute(self.config["max_query"])
        self.start_id = start_id
        self.max_id = max_id
        self.step = step
        self.id_file = id_file
        self.limit = 50000
        
        self.action = self.config['action']
        self.metadata = {"_index":self.config["index"]["_index"],"_type":self.config["index"]["_type"]}
        
        self.es = Elasticsearch(self.config["index"]["host"])
        
        self.mutex = threading.Lock()
        self.thread_num = 0
        self.db_data=[]
        self.complete = False
Example #17
0
    def get_es(self):
        if self.es is None:
            ssl_url = self.es_url.startswith('https')

            if ssl_url:
                # TODO add valid cert in ES setup
                logger.warning('ES does not use cert validation.')

            self.es = Elasticsearch([self.es_url], verify_certs=False)

        return self.es
Example #18
0
    def run_rule(self, rule):
        """ Run a rule including querying and alerting on results.

        :param rule: The rule configuration.
        :return: The number of matches that the rule produced.
        """

        elastalert_logger.info('Start to run rule: %s', rule.get('name'))
        # Run the rule. If querying over a large time period, split it up into segments
        self.num_hits = 0
        rule_request = rule.get("input").get("search").get("request")
        if rule_request.get("elastic_host",
                            None) is not None and rule_request.get(
                                "elastic_port", None) is not None:
            self.current_es = Elasticsearch(
                host=rule.get("input").get("search").get("request").get(
                    "elastic_host"),
                port=rule.get("input").get("search").get("request").get(
                    "elastic_port"))
        else:
            self.current_es = self.new_elasticsearch(self.global_config)

        self.run_query(rule)

        # Process any new matches
        num_matches = len(rule['type'].matches)

        while rule['type'].matches:
            match = rule['type'].matches.pop(0)

            #if self.is_silenced(rule['name'] + key) or self.is_silenced(rule['name']):
            #    elastalert_logger.info('Ignoring match for silenced rule %s%s' % (rule['name'], key))
            #    continue

            if rule.get('realert'):
                next_alert, exponent = self.next_alert_time(
                    rule, rule['name'] + key, ts_now())
                self.set_realert(rule['name'] + key, next_alert, exponent)

            # If no aggregation, alert immediately
            #if not rule['aggregation']:
            #    self.alert([match], rule)
            #    continue
            self.alert([match], rule)

            # Add it as an aggregated match
            #self.add_aggregated_alert(match, rule)

        # Mark this endtime for next run's start
        #rule['previous_endtime'] = endtime

        #time_taken = time.time() - run_start

        return num_matches
Example #19
0
def main(host, port, index, type, chunk_size, geojson_file):

    def _charge_doc():
        for feature in load_geojson(geojson_file):
            yield {
                '_index': index,
                '_type': type,
                '_source': feature
            }

    es = Elasticsearch(host=host, port=port)
    helpers.bulk(es, _charge_doc(), chunk_size=chunk_size, request_timeout=6000)
Example #20
0
def elasticsearch_client(conf):
    """ returns an Elasticsearch instance configured using an es_conn_config """
    es_conn_conf = build_es_conn_config(conf)

    return Elasticsearch(host=es_conn_conf['es_host'],
                         port=es_conn_conf['es_port'],
                         url_prefix=es_conn_conf['es_url_prefix'],
                         use_ssl=es_conn_conf['use_ssl'],
                         verify_certs=es_conn_conf['verify_certs'],
                         connection_class=RequestsHttpConnection,
                         timeout=es_conn_conf['es_conn_timeout'],
                         send_get_body_as=es_conn_conf['send_get_body_as'])
Example #21
0
 def handle_error(self, message, data=None):
     ''' Logs message at error level and writes message, data and traceback to Elasticsearch. '''
     if not self.writeback_es:
         self.writeback_es = Elasticsearch(host=self.es_host,
                                           port=self.es_port)
     logging.error(message)
     body = {'message': message}
     tb = traceback.format_exc()
     body['traceback'] = tb.strip().split('\n')
     if data:
         body['data'] = data
     self.writeback('elastalert_error', body)
Example #22
0
def main():
    parser = ArgumentParser()
    parser.add_argument('-e',
                        '--elasticsearch-server',
                        default='localhost:9200')
    parser.add_argument('-d', '--dataset')
    parser.add_argument('-s', '--sections')
    opts = parser.parse_args()

    es_hosts = [opts.elasticsearch_server]
    dataset_name = opts.dataset
    dataset_sections = opts.sections

    es = Elasticsearch(hosts=es_hosts, timeout=120)

    if dataset_name == 'newsgroups':
        dataset = NewsgroupsDataset()
    elif dataset_name == 'aviskorpus':
        sections = None
        sources = None

        if dataset_sections:
            try:
                sections, sources = dataset_sections.split('-')
                sections = [int(s) for s in sections.split('|')]
                sources = [s for s in sources.split('|')]
            except Exception:
                logging.error('Malformed section specification "%s" ...' %
                              dataset_sections)
                sys.exit(1)

        dataset = AviskorpusDataset(sections=sections, sources=sources)
    elif dataset_name == 'ndt':
        sections = None
        lang = None

        if dataset_sections:
            try:
                sections, lang = dataset_sections.split('-')
                sections = [int(s) for s in sections.split('|')]
                lang = [s for s in lang.split('|')]
            except Exception:
                logging.error('Malformed section specification "%s" ...' %
                              dataset_sections)
                sys.exit(1)

        dataset = NDTDataset(lang=lang, sections=sections)
    else:
        logging.error('Unknown dataset %s ...' % dataset_name)
        sys.exit(1)

    dataset.install(es)
Example #23
0
    def filter(self, qs, value):
        client = Elasticsearch([settings.ELASTICSEARCH_HOST])
        value = value.lower()

        search_query = {
            "bool": {
                "must_not": [  # исключает из выдачи is_published=False
                    {
                        "term": {
                            "is_published": False
                        }
                    }
                ],
                "should": [
                    {
                        "simple_query_string": {
                            "fields": ["category_name"],
                            "quote_field_suffix": ".exact",
                            "query": value
                        }
                    },
                ]
            }
        }

        s = Search(using=client, index='category') \
            .query(search_query)\
            .sort("_score", "-views")\
            .extra(size=self.max_result, from_=0)

        hits_list = []
        items = s.execute()
        if items:
            for item in items:
                hits_list.append(item.meta.id)
            hits_order = Case(
                *[When(pk=pk, then=pos) for pos, pk in enumerate(hits_list)])
            qs = qs.filter(id__in=hits_list).order_by(hits_order)
        else:
            qs = qs.none()

            # TODO: fallback?
            # bits = value.split(' ')
            # search_clauses = reduce(operator.and_,
            #                         [Q(title__icontains=v) for v in bits])
            # unpublished = Category.objects.get_queryset_descendants(
            #     Category.objects.filter(is_published=False), include_self=True)
            # qs = (qs
            #       .exclude(pk__in=unpublished)
            #       .filter(search_clauses)
            #       .order_by('-views'))
        return qs[:self.max_result]
 def _connect(self):
     """
     connect to a member of the ElasticSearch cluster
     """
     try:
         if self.local_env:
             self.es = Elasticsearch([{'host': self.host,
                                       'port': self.port}])
         else:
             self.es = Elasticsearch([{'host': self.host,
                                       'port': self.port}],
                                     sniff_on_start=True,
                                     sniff_on_connection_fail=True,
                                     sniffer_timeout=self.timeout)
         self.idx = IndicesClient(self.es)
         return
     except ConnectionError as e:
         return ElasticSearchError.no_host_available(self.host, self.port)
     except Exception as e:
         (type_e, value, traceback_prev) = exc_info()
         backtrace = extract_tb(traceback_prev)
         return ElasticSearchError.unknown_exception(backtrace, str(e))
Example #25
0
def createElasticsearchClient(conf):
    auth = None
    username = os.environ.get('ELASTICSEARCH_USERNAME')
    password = os.environ.get('ELASTICSEARCH_PASSWORD')
    if username and password:
        auth = (username, password)
    return Elasticsearch(
        host=conf['elasticsearch']['host'],
        port=conf['elasticsearch']['port'],
        url_prefix=conf['elasticsearch']['urlPrefix'],
        use_ssl=conf['elasticsearch']['sslEnabled'],
        verify_certs=conf['elasticsearch']['sslStrictEnabled'],
        http_auth=auth,
        timeout=conf['elasticsearch']['timeoutSeconds'])
Example #26
0
 def test_repr_truncates_host_to_10(self):
     hosts = [{"host": "es" + str(i)} for i in range(20)]
     self.assertEquals(
         '<Elasticsearch(%r)>' % [{
             'host': 'es0'
         }, {
             'host': 'es1'
         }, {
             'host': 'es2'
         }, {
             'host': 'es3'
         }, {
             'host': 'es4'
         }, '...'], repr(Elasticsearch(hosts)))
Example #27
0
def create_es_publisher_job(*, elasticsearch, host, neo4j, **kwargs):
    """
    :param elasticsearch_index_alias:  alias for Elasticsearch used in
                                       amundsensearchlibrary/search_service/config.py as an index
    :param elasticsearch_doc_type_key: name the ElasticSearch index is prepended with. Defaults to `table` resulting in
                                       `table_search_index`
    :param model_name:                 the Databuilder model class used in transporting between Extractor and Loader
    :param cypher_query:               Query handed to the `Neo4jSearchDataExtractor` class, if None is given (default)
                                       it uses the `Table` query baked into the Extractor
    :param elasticsearch_mapping:      Elasticsearch field mapping "DDL" handed to the `ElasticsearchPublisher` class,
                                       if None is given (default) it uses the `Table` query baked into the Publisher
    """
    elasticsearch_client = Elasticsearch([{'host': elasticsearch["host"]}])
    # unique name of new index in Elasticsearch
    elasticsearch_new_index_key = 'tables' + str(uuid.uuid4())
    data_path = host["es_data_path"]
    job_config = ConfigFactory.from_dict({
        f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.GRAPH_URL_CONFIG_KEY}':
        neo4j["endpoint"],
        f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.MODEL_CLASS_CONFIG_KEY}':
        'databuilder.models.table_elasticsearch_document.TableESDocument',
        f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.NEO4J_AUTH_USER}':
        neo4j["user"],
        f'extractor.search_data.extractor.neo4j.{Neo4jExtractor.NEO4J_AUTH_PW}':
        neo4j["password"],
        f'loader.filesystem.elasticsearch.{FSElasticsearchJSONLoader.FILE_PATH_CONFIG_KEY}':
        data_path,
        f'loader.filesystem.elasticsearch.{FSElasticsearchJSONLoader.FILE_MODE_CONFIG_KEY}':
        'w',
        f'publisher.elasticsearch.{ElasticsearchPublisher.FILE_PATH_CONFIG_KEY}':
        data_path,
        f'publisher.elasticsearch.{ElasticsearchPublisher.FILE_MODE_CONFIG_KEY}':
        'r',
        f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_CLIENT_CONFIG_KEY}':
        elasticsearch_client,
        f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_NEW_INDEX_CONFIG_KEY}':
        elasticsearch_new_index_key,
        f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_DOC_TYPE_CONFIG_KEY}':
        'table',
        f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_ALIAS_CONFIG_KEY}':
        'table_search_index',
    })
    task = DefaultTask(loader=FSElasticsearchJSONLoader(),
                       extractor=Neo4jSearchDataExtractor(),
                       transformer=NoopTransformer())
    job = DefaultJob(conf=job_config,
                     task=task,
                     publisher=ElasticsearchPublisher())
    return job
Example #28
0
def update():

    for doc in scan(Elasticsearch(ES_DESTINATION),
                    query={"query": {
                        "match_all": {}
                    }},
                    index="smartapi_docs",
                    scroll="60m"):
        print(doc["_id"])
        smartapi = SmartAPI.get(doc["_id"])
        print(smartapi.check())
        print(smartapi.refresh())
        if smartapi.webdoc.status == 299:
            smartapi.webdoc._status = 200  # change status not reliable during migration
        smartapi.save()
    print()
Example #29
0
def autocomplete_elastic_tags(request):
    client = Elasticsearch(hosts=settings.ELASTIC_SEARCH_NODES)
    index_name = elastic_cache_key(request.session['project_id'], 'ec2')

    if not client.indices.exists(index_name):
        try:
            populate_elastic_search(project_id=request.session['project_id'])
        except:
            import traceback
            LOG.error("Cannot build index: %s" % traceback.format_exc())
            raise Exception('Cache values not present')

    return dict(id='ins_id',
                name='ins_name',
                private_ip_address='private_ip_address',
                ip_address='ip_address')
Example #30
0
    def get_dashboard(self, rule, db_name):
        """ Download dashboard which matches use_kibana_dashboard from elasticsearch. """
        es = Elasticsearch(host=rule['es_host'], port=rule['es_port'])
        if not db_name:
            raise EAException("use_kibana_dashboard undefined")
        query = {'query': {'term': {'_id': db_name}}}
        try:
            res = es.search(index='kibana-int',
                            doc_type='dashboard',
                            body=query,
                            _source_include=['dashboard'])
        except ElasticsearchException as e:
            raise EAException("Error querying for dashboard: %s" % (e))

        if res['hits']['hits']:
            return json.loads(res['hits']['hits'][0]['_source']['dashboard'])
        else:
            raise EAException("Could not find dashboard named %s" % (db_name))