def search(): q = raw_input() conn = pyes.ES(['localhost:9201']) conn.default_indices = [INDEX_NAME] results = conn.search(query=pyes.query.TermQuery('_all', q)) for r in results: print r
def hostnameToNodeids(self, hostname): """ 通过本机的主机名获取结点id列表 """ nodes = [] try: # MatchQuery # MatchQuery 替代了 TextQuery query = pyes.query.MatchQuery("hostname", hostname) # 连接ES es = pyes.ES(self._url) search = pyes.query.Search(query=query, start=0, size=100, fields=[]) results = es.search(search, indices="stats", doc_types="stat") if results != None: for r in results.hits: nodes.append(r._id) except Exception, e: sys_log.SysLog(self._logFile, self._instance).writeLog( "error", str(traceback.format_exc()))
def search(): es = pyes.ES('127.0.0.1:9200') page_no = request.GET.get('page_no') query_string = request.GET.get('query') query = pyes.StringQuery(query_string) result = es.search(query=query, search_fields="_all") bill_list = [] session = models.DBSession() for res in result['hits']['hits']: id = res['_source']['id'] bill = session.query(models.BillRevision).get(id) if not bill: log.info('bill_revs record with %r not found.' % id) continue bill_list.append(bill) if bill_list: pages = utils.Pagination(settings.ITEM_PER_PAGE, settings.PAGE_DISPLAYED, len(bill_list), page_no) else: pages = None if pages: bill_list = bill_list[pages.first:pages.last] return {'bill': bill_list, 'query': query_string, 'pages': pages}
def request_data(args): query = generate_query(args) outputters = build_analysers(args) address = args.get("es_server", "localhost:9200") print "Connecting to: %s" % address conn = pyes.ES(address) size = args.get("size", 20) if args.get("all", False): print "Retrieving count" data = conn.count(query) size = data.get("count") batch = min(args.get("batch", 1000), size) splits = range(0, size, batch) errors = [] for s in splits: data = retrieve_data(conn, query, s, batch, args) e = analyse_data(data, outputters, args) errors.extend(e) if errors: if 'output' in args: output_file = args.get('output') + "_errors.json" output = open(output_file, 'w') output.write("%s\n" % json.dumps(errors)) output.close() else: print "Errors:" print errors
def upa_neguim(projetos): print 'Connecting to ES...' conn = pyes.ES('http://127.0.0.1:9200') try: print 'Creating index...' conn.indices.create_index("monitor") except: pass mapping = { "data_fim": { "type": "date", "format": "dd/MM/YYYY" }, "data_ini": { "type": "date", "format": "dd/MM/YYYY" } } print 'Mapping...' conn.indices.put_mapping("projeto", {'properties': mapping}, ["monitor"]) erros = 0 print 'Indexing!' for v in projetos: p = projetos[v] conn.index(p, 'monitor', 'projeto', p['id'], bulk=True) try: conn.index(p, 'monitor', 'projeto', p['id'], bulk=True) except: print "erro" erros = erros + 1 print erros
def esPruneIndexes(): if options.output == 'syslog': logger.addHandler(SysLogHandler(address=(options.sysloghostname, options.syslogport))) else: sh = logging.StreamHandler(sys.stderr) sh.setFormatter(formatter) logger.addHandler(sh) logger.debug('started') try: es = pyes.ES((list('{0}'.format(s) for s in options.esservers))) indices = es.indices.stats()['indices'].keys() # do the pruning for (index, dobackup, rotation, pruning) in zip(options.indices, options.dobackup, options.rotation, options.pruning): try: if pruning != '0': index_to_prune = index if rotation == 'daily': idate = date.strftime(datetime.utcnow()-timedelta(days=int(pruning)),'%Y%m%d') index_to_prune += '-%s' % idate elif rotation == 'monthly': idate = date.strftime(datetime.utcnow()-timedelta(days=31*int(pruning)),'%Y%m') index_to_prune += '-%s' % idate if index_to_prune in indices: logger.info('Deleting index: %s' % index_to_prune) es.indices.delete_index(index_to_prune) else: logger.error('Error deleting index %s, index missing' % index_to_prune) except Exception as e: logger.error("Unhandled exception while deleting %s, terminating: %r" % (index_to_prune, e)) except Exception as e: logger.error("Unhandled exception, terminating: %r"%e)
def pushToES(self): ttComplete = self.tFrame # Add a id for looping into ElasticSearch index ttComplete["no_index"] = range(1,len(ttComplete)+1) # Convert DataFrame into json tmp = ttComplete.reset_index().to_json(orient="records") # Load each record into jon format before bulk tmp_json = json.loads(tmp) print("Convert Dataframe into Json view:") print("total number of json list is:") print(len(tmp_json)) print(tmp_json[1:3]) index_name = 'youtube' type_name = 'pyelastic' es = pyes.ES() i = 1 for doc in tmp_json: es.index(doc, index_name, type_name, id = doc["no_index"]) i=i+1 print("Number of doc in the batch") print(i-1)
def get_search(self, request, **kwargs): """ Execute a search query to elasticsearch Request parameters are: - `q`: string query - `types`: set of document types (`contact`, `organization`, `invoice`, ...) A minimum of 2 chars are required for the query to be processed (wildcards excluded). """ import re import pyes from pyes.query import Search, StringQuery self.method_check(request, allowed=['get']) self.is_authenticated(request) self.throttle_check(request) try: # Tenant (slug) must be present tenant = request.tenant.slug # By default, search is made among all types, can be overriden passing a types argument doc_types = request.GET.getlist('types') # The 'q' parameter represents the query query = request.GET.get('q') # The query must be a string and composed by at least 2 chars (ES wildcards excluded) assert (isinstance(query, basestring) and len(re.sub('[?*]', '', query)) >= 2) except: return http.HttpBadRequest() try: conn = pyes.ES(settings.ES_SERVERS, basic_auth=settings.ES_AUTH) q = Search(StringQuery(query)) resultset = conn.search( q, indices=tenant, doc_types=u",".join(doc_types) if doc_types else None) searched_items = [] for res in resultset: res.update({ 'id': res._meta['id'], 'resource_type': res._meta['type'], 'score': res._meta['score'] }) searched_items.append(res) except: return http.HttpBadRequest() self.log_throttled_access(request) paginator = self._meta.paginator_class( request.GET, searched_items, resource_uri=self.get_resource_uri(), limit=self._meta.limit) to_be_serialized = paginator.page() return self.create_response(request, to_be_serialized, response_class=http.HttpResponse)
def discover(): es = pyes.ES(server=(list('{0}'.format(s) for s in options.esservers))) indicesManager = pyes.managers.Indices(es) indices = indicesManager.get_indices() config_indices = [] config_dobackup = [] config_rotation = [] config_pruning = [] for index in indices.keys(): index_template = index freq = 'none' pruning = '0' if re.search(r'-[0-9]{8}', index): freq = 'daily' pruning = '20' index_template = index[:-9] elif re.search(r'-[0-9]{6}', index): freq = 'monthly' index_template = index[:-7] if index_template not in config_indices: config_indices.append(index_template) config_dobackup.append('1') config_rotation.append(freq) config_pruning.append(pruning) setConfig('backup_indices', ','.join(config_indices), options.configfile) setConfig('backup_dobackup', ','.join(config_dobackup), options.configfile) setConfig('backup_rotation', ','.join(config_rotation), options.configfile) setConfig('backup_pruning', ','.join(config_pruning), options.configfile)
def get_similar_dockets(text, exclude_docket): es = pyes.ES(settings.ES_SETTINGS) results = es.search_raw({ 'query': { 'more_like_this': { 'fields': ['files.text'], 'like_text': text } }, 'filter': { 'and': [ { 'terms': {'document_type': ['rule', 'proposed_rule', 'notice']} }, { 'not': { 'term': {'docket_id': exclude_docket} } } ] }, 'fields': ['docket_id'] }, indices=ES_INDEX, doc_types=['document']) docket_ids = [hit['fields']['docket_id'] for hit in results.hits.hits] return uniq(docket_ids)
def Delete_Overdue_Indexes(self, retain, dDate=None, idxType="logs"): """ 删除已过期的索引 """ if retain <= 0: return if dDate == None: dDate = datetime.datetime.now() dDate = dDate + datetime.timedelta(days=0 - retain) idxname = LOGPREFIX + "-" + dDate.strftime(INDEX_PATTERN) idxPattern = LOGPREFIX + "-*" if idxType == "flows": idxname = FLOWS_LOGPREFIX + "-" + dDate.strftime( FLOWS_INDEX_PATTERN) idxPattern = FLOWS_LOGPREFIX + "-*" # 连接ES es = pyes.ES(self._url) # 获取索引列表 lst = es.indices.aliases(idxPattern).keys() for idx in lst: if idx < idxname: es.indices.delete_index(idx)
def esPruneIndexes(): es = pyes.ES((list('{0}'.format(s) for s in options.esservers))) indexes = es.indices.stats()['indices'].keys() # print('[*]\tcurrent indexes: {0}'.format(indexes)) # set index names events-YYYYMMDD, alerts-YYYYMM, etc. dtNow = datetime.utcnow() targetSuffix = date.strftime(dtNow - timedelta(days=options.days), '%Y%m%d') # rotate daily eventsIndexName = 'events-{0}'.format(targetSuffix) # rotate monthly targetSuffix = date.strftime(dtNow - timedelta(days=options.months * 30), '%Y%m') alertsIndexName = 'alerts-{0}'.format(targetSuffix) correlationsIndexName = 'correlations-{0}'.format(targetSuffix) print('[*]\tlooking for old indexes: {0},{1},{2}'.format( eventsIndexName, alertsIndexName, correlationsIndexName)) if eventsIndexName in indexes: print('[*]\tdeleting: {0}'.format(eventsIndexName)) es.indices.delete_index(eventsIndexName) if alertsIndexName in indexes: print('[*]\tdeleting: {0}'.format(alertsIndexName)) es.indices.delete_index(alertsIndexName) if correlationsIndexName in indexes: print('[*]\tdeleting: {0}'.format(correlationsIndexName)) es.indices.delete_index(correlationsIndexName)
def naslagview(request): es = pyes.ES(settings.ELASTIC_SEARCH) r = es.search(pyes.MatchAllQuery(), indices='nedind', doc_types=['naslag'], sort="boekcode") return direct_to_template(request, "naslag.html", {'naslag': r})
def start(self, cfg): self.es_host = cfg.get('database_elasticsearch', 'host') self.es_port = cfg.get('database_elasticsearch', 'port') self.es_index = cfg.get('database_elasticsearch', 'index') self.es_type = cfg.get('database_elasticsearch', 'type') self.es_conn = pyes.ES('{0}:{1}'.format(self.es_host, self.es_port)) self.run(cfg)
def test_getCurrent_givesSumOfAllDocuments(self): '''This should take the dictionary reply of pyes.ES.status() and pull out a sum of num_docs''' #mock pyes.ES.status() self.mox.StubOutClassWithMocks(pyes, "ES") dummyESconn = pyes.ES(['myeshost:12345']) dummyESconn.status().AndReturn({ "indices": { "4534": { "docs": { "num_docs": 500000 } }, "4535": { "docs": { "num_docs": 50000 } } } }) self.mox.ReplayAll() #now to test my_es = check_es_insert.Elasticsearcher('myeshost:12345') result = my_es.getCurrent() self.assertEqual(result, 550000)
def main(): logger.debug('starting') logger.debug(options) es = pyes.ES((list('{0}'.format(s) for s in options.esservers))) results = esCloudTrailSearch(es) createAlerts(es, results) logger.debug('finished')
def Get_node_list_day(self, dDate=None): """ 获取某天流量数据的采集结点列表 """ lst = [] if dDate == None: dDate = datetime.datetime.now() idxname = FLOWS_LOGPREFIX + "-" + dDate.strftime(FLOWS_INDEX_PATTERN) # 连接ES es = pyes.ES(self._url) if es.indices.exists_index(idxname): # 查询 query = pyes.query.MatchAllQuery() search = query.search() # terms aggregation terms_agg = pyes.aggs.TermsAgg(name="tmpagg", field=FIELD_NODE, size=0) search.agg.add(terms_agg) result = es.search(search, size=0, indices=idxname) lst = result.aggs.tmpagg.buckets return lst
def __init__(self, esserver): """ Class used for ES features not supported by pyes and for shortcuts when using pyes """ self.esserver = esserver self.conn = pyes.ES(esserver)
def pushToES(self): ttComplete = self.tFrame # Add a id for looping into ElasticSearch index #ttComplete["no_index"] = range(1,len(ttComplete)+1) # Convert DataFrame into json tmp = ttComplete.reset_index().to_json(orient="records") # Load each record into json format before bulk tmp_json = json.loads(tmp) #print(tmp_json[1:3]) index_name = 'youtube' type_name = 'pyelastic' es = pyes.ES('169.53.152.5:9200') i = 1 for doc in tmp_json: #print "Document in tmp_json: " + str(i) try: es.index(doc, index_name, type_name, bulk=True) i = i + 1 except: e = sys.exc_info()[0] print e pass es.force_bulk() print "Total Number of doc in the batch: " + str( i - 1) + " . Successful doc: " + str(len(tmp_json))
def domain(): global start_date global end_date global eshost global esconn if len(sys.argv) != 4: usage() eshost = sys.argv[1] try: start_date = dateutil.parser.parse(sys.argv[2]).replace(tzinfo=dateutil.tz.tzutc()) end_date = dateutil.parser.parse(sys.argv[3]).replace(tzinfo=dateutil.tz.tzutc()) except ValueError as e: sys.stderr.write('error parsing date: {}\n'.format(e)) sys.exit(1) try: esconn = pyes.ES(eshost) except RuntimeError as e: sys.stderr.write('error parsing es host argument: {}\n'.format(e)) sys.exit(1) try: if not esconn.indices.exists_index(indexname): sys.stderr.write('error: index {} not found\n'.format(indexname)) sys.exit(1) except pyes.exceptions.NoServerAvailable: sys.stderr.write('error checking for index on specified es host\n') sys.exit(1) alerts = make_query() pdata(alerts)
def index(): mapping = { 'document': { 'type': 'attachment', 'fields': { "title": { "store": "yes" }, "file": { "term_vector": "with_positions_offsets", "store": "yes" } } }, 'name': { 'type': 'string', 'store': 'yes', 'boost': 1.0, 'index': 'analyzed' }, 'long_name': { 'type': 'string', 'store': 'yes', 'boost': 1.0, 'index': 'analyzed' }, 'status': { 'type': 'string', 'store': 'yes', }, 'year': { 'type': 'integer', 'store': 'yes' }, 'read_by': { 'type': 'string', 'store': 'yes', 'index': 'analyzed' }, 'date_presented': { 'type': 'date', 'store': 'yes' }, 'bill_id': { 'type': 'integer', 'store': 'yes' } } es = pyes.ES('192.168.0.104:9200') es.create_index_if_missing('bill-index') es.put_mapping('bill-type', {'bill-type': { 'properties': mapping }}, ['bill-index']) es.refresh('bill-index') get_row = get_indexable_bills() for i in get_row: es.index(i, 'bill-index', 'bill-type') es.refresh('bill-index')
def main(): start = time.time() arguments = parser().parse_args() if (not arguments.hours_to_keep and not arguments.days_to_keep and not arguments.disk_space_to_keep): print('Invalid arguments: You must specify either the number of hours,' ' the number of days to keep or the maximum disk space to use') parser.print_help() return address = '{0}:{1}'.format(arguments.host, arguments.port) connection = pyes.ES(address, timeout=arguments.timeout) if arguments.days_to_keep: print('Deleting daily indices older than {0} days.'.format( arguments.days_to_keep)) expired_indices = find_expired_indices(connection, arguments.days_to_keep, arguments.hours_to_keep, arguments.separator, arguments.prefix) if arguments.hours_to_keep: print('Deleting hourly indices older than {0} hours.'.format( arguments.hours_to_keep)) expired_indices = find_expired_indices(connection, arguments.days_to_keep, arguments.hours_to_keep, arguments.separator, arguments.prefix) if arguments.disk_space_to_keep: print("Let's keep disk usage lower than {0} GB.".format( arguments.disk_space_to_keep)) expired_indices = find_overusage_indices(connection, arguments.disk_space_to_keep, arguments.separator, arguments.prefix) print('') for index_name, expired_by in expired_indices: expiration = timedelta(seconds=expired_by) if arguments.dry_run: print("Would have attempted deleting index {0} because it is {1} " "older than the calculated cutoff.".format( index_name, expiration)) continue print( "Deleting index {0} because it was {1} older than cutoff.".format( index_name, expiration)) deletion = connection.delete_index_if_exists(index_name) # On success ES returns a dict on the form: # {u'acknowledged': True, u'ok': True} if deletion.get('ok'): print("Successfully deleted index: {0}".format(index_name)) else: print("Error deleting index: {0}. ({1})".format( index_name, deletion)) print('\nDone in {0}.'.format(timedelta(seconds=time.time() - start)))
def delete_index(name): try: conn = pyes.ES(ES_PATH, timeout=200.0) conn.indices.delete_index(name) print "索引%s被删除" % name except Exception, e: print "删除索引%s失败" % name es_logger.error(str(e))
def main(): logger.debug('starting') logger.debug(options) es = pyes.ES((list('{0}'.format(s) for s in options.esservers))) #see if we have matches. indicatorCounts = esSearch(es) createAlerts(es, indicatorCounts) logger.debug('finished')
def getCurrent(self, index=''): conn = pyes.ES([self.address]) status = conn.indices.status() for es_index in status['indices'].iterkeys(): if index == es_index or index == "": self.mysum = self.mysum + status['indices'][es_index]['docs'][ 'num_docs'] return self.mysum
def get_conn(): host = "127.0.0.1:9200" db_name = "fundfind" # host = config["ELASTIC_SEARCH_HOST"] # db_name = config["ELASTIC_SEARCH_DB"] # print host, db_name conn = pyes.ES([host]) return conn, db_name
def esRotateIndexes(): if options.output == 'syslog': logger.addHandler( SysLogHandler(address=(options.sysloghostname, options.syslogport))) else: sh = logging.StreamHandler(sys.stderr) sh.setFormatter(formatter) logger.addHandler(sh) logger.debug('started') try: es = pyes.ES((list('{0}'.format(s) for s in options.esservers))) indices = es.indices.stats()['indices'].keys() odate_day = date.strftime(datetime.utcnow() - timedelta(days=1), '%Y%m%d') odate_month = date.strftime(datetime.utcnow() - timedelta(days=1), '%Y%m') ndate_day = date.strftime(datetime.utcnow(), '%Y%m%d') ndate_month = date.strftime(datetime.utcnow(), '%Y%m') for (index, dobackup, rotation, pruning) in zip(options.indices, options.dobackup, options.rotation, options.pruning): try: if rotation != 'none': oldindex = index newindex = index if rotation == 'daily': oldindex += '-%s' % odate_day newindex += '-%s' % ndate_day elif rotation == 'monthly': oldindex += '-%s' % odate_month newindex += '-%s' % ndate_month # do not rotate before the month ends if oldindex == newindex: logger.debug( 'do not rotate %s index, month has not changed yet' % index) continue logger.debug('Creating %s index' % newindex) es.indices.create_index(newindex) logger.debug('Updating %s alias to new index' % index) es.indices.set_alias(index, newindex) if oldindex in indices: logger.debug( 'Updating %s-previous alias to old index' % index) es.indices.set_alias('%s-previous' % index, oldindex) else: logger.debug( 'Old index %s is missing, do not change %s-previous alias' % oldindex, index) except Exception as e: logger.error( "Unhandled exception while rotating %s, terminating: %r" % (index, e)) except Exception as e: logger.error("Unhandled exception, terminating: %r" % e)
def main(): logger.debug('starting') logger.debug(options) es = pyes.ES(server=(list('{0}'.format(s) for s in options.esservers))) try: auth = HTTPBasicAuth(options.mquser, options.mqpassword) for server in options.mqservers: logger.debug('checking message queues on {0}'.format(server)) r = requests.get('http://{0}:{1}/api/queues'.format(server, options.mqapiport), auth=auth) mq = r.json() #setup a log entry for health/status. healthlog = dict( utctimestamp=pytz.timezone('US/Pacific').localize( datetime.now()).isoformat(), hostname=server, processid=os.getpid(), processname=sys.argv[0], severity='INFO', summary='mozdef health/status', category='mozdef', tags=[], details=[]) healthlog['details'] = dict(username='******') healthlog['details']['loadaverage'] = list(os.getloadavg()) healthlog['tags'] = ['mozdef', 'status'] for m in mq: if 'message_stats' in m.keys(): if 'messages_ready' in m.keys(): mready = m['messages_ready'] else: mready = 0 if 'messages_unacknowledged' in m.keys(): munack = m['messages_unacknowledged'] else: munack = 0 healthlog['details'][m['name']] = dict( messages_ready=mready, messages_unacknowledged=munack) if 'deliver_details' in m['message_stats'].keys(): healthlog['details'][m['name']]['deliver_eps'] = \ m['message_stats']['deliver_details']['rate'] if 'publish_details' in m['message_stats'].keys(): healthlog['details'][m['name']]['publish_eps'] = \ m['message_stats']['publish_details']['rate'] #print(json.dumps(healthlog, sort_keys=True, indent=4)) #post to elastic search servers directly without going through message queues es.index( index='events', doc_type='mozdefhealth', doc=json.dumps(healthlog), bulk=False) except Exception as e: logger.error("Exception %r when gathering health and status " % e)
def indexed_count(index): aip_indexed_file_count = 0 try: conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort()) count_data = conn.count(indices=index) aip_indexed_file_count = count_data.count except: pass return aip_indexed_file_count
def check_server_status(): try: conn = pyes.ES(getElasticsearchServerHostAndPort()) conn._send_request('GET', '/') except: return 'Connection error' # no errors! return 'OK'