Python ElasticSearch Exemples, pyelasticsearch.client.ElasticSearch Python Exemples

Exemple #1

0

Afficher le fichier

 def index_documents(self, name, documents, bulk=True):
     attempts = 0
     destination_index = ""
     is_box = False
     if name.startswith("boxinfo"):
         destination_index = self.boxinfo_write
         is_box = True
     else:
         destination_index = self.runindex_write
     while True:
         attempts += 1
         try:
             if bulk:
                 self.es.bulk_index(destination_index, name, documents)
             else:
                 self.es.index(destination_index, name, documents[0])
             return True
         except ElasticHttpError as ex:
             if attempts <= 1: continue
             self.logger.error(
                 'elasticsearch HTTP error. skipping document ' + name)
             if is_box == True: break
             #self.logger.exception(ex)
             return False
         except (socket.gaierror, ConnectionError, Timeout) as ex:
             if attempts > 100 and self.runMode:
                 raise (ex)
             self.logger.error('elasticsearch connection error. retry.')
             if self.stopping: return False
             time.sleep(0.1)
             ip_url = getURLwithIP(self.es_server_url, self.nsslock)
             self.es = ElasticSearch(ip_url, timeout=20, revival_delay=60)
             if is_box == True: break
     return False

Exemple #2

0

Afficher le fichier

    def updateIndexMaybe(self, index_name, alias_write, alias_read, settings,
                         mapping):
        connectionAttempts = 0
        retry = False
        while True:
            if self.stopping: break
            connectionAttempts += 1
            try:
                if retry or self.ip_url == None:
                    self.ip_url = getURLwithIP(self.es_server_url,
                                               self.nsslock)
                    self.es = ElasticSearch(self.ip_url,
                                            timeout=20,
                                            revival_delay=60)

                #check if runindex alias exists
                if requests.get(self.es_server_url + '/_alias/' +
                                alias_write).status_code == 200:
                    self.logger.info('writing to elastic index ' +
                                     alias_write + ' on ' +
                                     self.es_server_url + ' - ' + self.ip_url)
                    self.createDocMappingsMaybe(alias_write, mapping)
                    break
                else:
                    time.sleep(.5)
                    if (connectionAttempts % 10) == 0:
                        self.logger.error(
                            'unable to access to elasticsearch alias ' +
                            alias_write + ' on ' + self.es_server_url + ' / ' +
                            self.ip_url)
                    continue
            except ElasticHttpError as ex:
                #es error, retry
                self.logger.error(ex)
                if self.runMode and connectionAttempts > 100:
                    self.logger.error(
                        'elastic (BU): exiting after 100 ElasticHttpError reports from '
                        + self.es_server_url)
                    sys.exit(1)
                elif self.runMode == False and connectionAttempts > 10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry = True
                continue

            except (socket.gaierror, ConnectionError, Timeout) as ex:
                #try to reconnect with different IP from DNS load balancing
                if self.runMode and connectionAttempts > 100:
                    self.logger.error(
                        'elastic (BU): exiting after 100 connection attempts to '
                        + self.es_server_url)
                    sys.exit(1)
                elif self.runMode == False and connectionAttempts > 10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry = True
                continue

Exemple #3

0

Afficher le fichier

Fichier : one_timer_index.py Projet : kgprocks/search_as_you_type

    def __init__(self,
                 index_name,
                 settings_path,
                 host="http://127.0.0.1:9200"):

        self.connection = ElasticSearch(host)
        self.index_name = index_name
        self.settings_path = settings_path

        self.create_index()

Exemple #4

0

Afficher le fichier

Fichier : setupES.py Projet : diguida/hltd

def setupES(es_server_url='http://localhost:9200',deleteOld=1,doPrint=False,overrideTests=False, forceReplicas=-1):

    #ip_url=getURLwithIP(es_server_url)
    es = ElasticSearch(es_server_url,timeout=5)

    #get_template
    #es.send_request('GET', ['_template', name],query_params=query_params)

    #list_template
    #res = es.cluster_state(metric='metadata')
    templateList = es.send_request('GET', ['_template'])
    #templateList = res['metadata']['templates']



    TEMPLATES = ["runappliance"]
    for template_name in TEMPLATES:
        if template_name not in templateList:
            printout(template_name+"template not present. It will be created. ",doPrint,False)
            create_template(es,template_name)
        else:
            norm_name = convert(templateList[template_name])
            if deleteOld==0:
                printout(template_name+" already exists. Add 'replace' parameter to update if different, or forceupdate to always  update.",doPrint,False)
            else:
                printout(template_name+" already exists.",doPrint,False)
                loaddoc = load_template(es,template_name)
                if forceReplicas>=0:
                  loaddoc['settings']['index']['number_of_replicas']=forceReplicas
                if loaddoc!=None:
                    mappingSame =  norm_name['mappings']==loaddoc['mappings']
                    #settingSame = norm_name['settings']==loaddoc['settings']
                    settingsSame=True
                    if int(norm_name['settings']['index.number_of_replicas'])!=int(loaddoc['settings']['index']['number_of_replicas']):
                        settingsSame=False
                    if int(norm_name['settings']['index.number_of_shards'])!=int(loaddoc['settings']['index']['number_of_shards']):
                        settingsSame=False
	            #currently analyzer settings are ot checked
                    #if norm_name['settings']['index']['analysis']!=loaddoc['settings']['analysis']:
                    #    settingsSame=False
                    if not (mappingSame and settingsSame) or deleteOld>1:
                        #test is override
			if overrideTests==False:
			    try:
		                if norm_name['settings']['index,test']==True:
                                    printout("Template test setting found, skipping update...",doPrint,True)
                                    return
			    except:pass
                        #delete_template(es,template_name)
			printout("Updating "+template_name+" ES template",doPrint,True)
                        create_template(es,template_name)
		    else:
                      printout('runappliance ES template is up to date',doPrint,True)

Exemple #5

0

Afficher le fichier

Fichier : collate.py Projet : vanbesien/hltd

 def __init__(self, es_server_url):
     self.server = ElasticSearch(es_server_url)
     self.datadict = {
         'prc-out': {
             "lookup": Query('prc-out', 'source'),
             "action": {
                 'definition':
                 Aggregator('drop'),
                 'data':
                 Aggregator({
                     'in': Aggregator('add'),
                     'out': Aggregator('add'),
                     'file': Aggregator('cat')
                 }),
                 'ls':
                 Aggregator('check'),
                 'stream':
                 Aggregator('check'),
                 'source':
                 Aggregator('match')
             }
         },
         'prc-in': {
             "lookup": Query('prc-in', 'dest'),
             "action": {
                 'definition': Aggregator('drop'),
                 'data': Aggregator({
                     'out': Aggregator('add'),
                 }),
                 'ls': Aggregator('check'),
                 'index': Aggregator('cat'),
                 'source': Aggregator('check'),
                 'dest': Aggregator('check'),
                 'process': Aggregator('cat')
             }
         },
         'prc-s-state': {
             "lookup": Query('prc-s-state'),
             "action": {
                 'macro': Aggregator('histoadd'),
                 'mini': Aggregator('histoadd'),
                 'micro': Aggregator('histoadd'),
                 'tp': Aggregator('add'),
                 'lead': Aggregator('avg'),
                 'nfiles': Aggregator('add'),
                 'ls': Aggregator('check'),
                 'process': Aggregator('cat')
             }
         }
     }

Exemple #6

0

Afficher le fichier

Fichier : consume_base.py Projet : vnisor/cotl

	def __init__(self,group_name, topic_name, timeout=60, filename='config.txt'):
		self.logger = logging.getLogger(__name__)
		self.logger.setLevel(logging.INFO)
		handler = logging.FileHandler('../_logs/%s.log'%group_name)
		handler.setLevel(logging.INFO)
		formatter = logging.Formatter('%(asctime)s %(message)s')
		handler.setFormatter(formatter)
		self.logger.addHandler(handler)

		try:
			f = open(filename, 'r')
			self.hbasehost = f.readline().split(' ')[0]
			self.eshost = f.readline().split(' ')[0]
			self.kafkahost = f.readline().split(' ')[0]
			self.hdfshost = f.readline().split(' ')[0]
			self.logger.info('All Hosts Loaded')
		except Exception as e:
			self.logger.warning('file load error, %s'%filename)
			self.logger.warning(str(e))
			raise
			# sys.exit(0)

		self.group_name = group_name
		self.topic_name = topic_name
		self.timeout = timeout

		try:
			self.kafka = KafkaClient(self.kafkahost)
			self.pool = happybase.ConnectionPool(size=6, host=self.hbasehost)
			self.es = ElasticSearch(self.eshost)
		except Exception as e:
			self.logger.warning(str(e))
			raise

Exemple #7

0

Afficher le fichier

Fichier : elasticbu.py Projet : diguida/hltd

 def index_documents(self,name,documents,bulk=True):
     attempts=0
     destination_index = ""
     is_box=False
     if name.startswith("boxinfo") or name=='resource_summary':
       destination_index = self.boxinfo_write
       is_box=True
     else:
       destination_index = self.runindex_write
     while True:
         attempts+=1
         try:
             if bulk:
                 self.es.bulk_index(destination_index,name,documents)
             else:
                 self.es.index(destination_index,name,documents[0])
             return True
         except ElasticHttpError as ex:
             if attempts<=1:continue
             self.logger.error('elasticsearch HTTP error'+str(ex)+'. skipping document '+name)
             if is_box==True:break
             #self.logger.exception(ex)
             return False
         except (socket.gaierror,ConnectionError,Timeout) as ex:
             if attempts>100 and self.runMode:
                 raise(ex)
             self.logger.error('elasticsearch connection error' + str(ex)+'. retry.')
             if self.stopping:return False
             ip_url=getURLwithIP(self.es_server_url,self.nsslock)
             self.es = ElasticSearch(ip_url,timeout=20)
             time.sleep(0.1)
             if is_box==True:break
     return False

Exemple #8

0

Afficher le fichier

Fichier : one_timer_index.py Projet : kgprocks/search_as_you_type

    def __init__(self, index_name, settings_path, host="http://127.0.0.1:9200"):
        
        self.connection = ElasticSearch(host)
        self.index_name = index_name
        self.settings_path = settings_path

        self.create_index()

Exemple #9

0

Afficher le fichier

Fichier : one_timer_index.py Projet : kgprocks/search_as_you_type

class IndexData:
    def __init__(self,
                 index_name,
                 settings_path,
                 host="http://127.0.0.1:9200"):

        self.connection = ElasticSearch(host)
        self.index_name = index_name
        self.settings_path = settings_path

        self.create_index()

    def get_settings(self):

        config_file = file(self.settings_path)
        settings = json.load(config_file)
        return settings

    def create_index(self):

        settings = self.get_settings()
        try:
            self.connection.create_index(self.index_name, settings)
        except pyelasticsearch.exceptions.ElasticHttpError as e:
            self.connection.delete_index(self.index_name)
            self.connection.create_index(self.index_name, settings)

    def index_data(self, data_path, index_type):

        if index_type is None:
            raise "Please enter valid index type"
        objects = []
        count = 0
        with open(data_path) as f:
            for line in f:
                word_split = line.split("\t")
                cin = word_split[0]
                name = word_split[1].strip()
                doc = {'cin': cin, 'name': name}
                objects.append(doc)
                if len(objects) > 1000:
                    response = self.connection.bulk_index(self.index_name,
                                                          index_type,
                                                          objects,
                                                          id_field='cin')
                    objects = []
            self.connection.bulk_index(self.index_name,
                                       index_type,
                                       objects,
                                       id_field='cin')

Exemple #10

0

Afficher le fichier

Fichier : setupES.py Projet : vanbesien/hltd

def main():
    if len(sys.argv) > 3:
        print "Invalid argument number"
        sys.exit(1)
    if len(sys.argv) < 2:
        print "Please provide an elasticsearch server url (e.g. http://localhost:9200)"
        sys.exit(1)

    deleteOld = False
    if len(sys.argv) > 2:
        if "replace" in sys.argv[2]:
            deleteOld = True

    es_server_url = sys.argv[1]
    ip_url = getURLwithIP(es_server_url)
    es = ElasticSearch(es_server_url)

    #get_template
    #es.send_request('GET', ['_template', name],query_params=query_params)

    #list_template
    res = es.cluster_state(filter_routing_table=True,
                           filter_nodes=True,
                           filter_blocks=True)
    templateList = res['metadata']['templates']

    for template_name in TEMPLATES:
        if template_name not in templateList:
            print "{0} template not present. It will be created. ".format(
                template_name)
            create_template(es, template_name)
        else:
            if deleteOld == False:
                print "{0} already exists. Add 'replace' parameter to force update.".format(
                    template_name)
            else:
                print "{0} already exists.".format(template_name)
                delete_template(es, template_name)
                print "Deleted old template and will recreate {0}".format(
                    template_name)
                create_template(es, template_name)

Exemple #11

0

Afficher le fichier

Fichier : one_timer_index.py Projet : kgprocks/search_as_you_type

class IndexData:

    def __init__(self, index_name, settings_path, host="http://127.0.0.1:9200"):
        
        self.connection = ElasticSearch(host)
        self.index_name = index_name
        self.settings_path = settings_path

        self.create_index()

    def get_settings(self):

        config_file = file(self.settings_path)
        settings = json.load(config_file)
        return settings

    def create_index(self):

        settings = self.get_settings()
        try:
            self.connection.create_index(self.index_name, settings)
        except pyelasticsearch.exceptions.ElasticHttpError as e:
            self.connection.delete_index(self.index_name)
            self.connection.create_index(self.index_name, settings)

    def index_data(self, data_path, index_type):

        if index_type is None:
            raise "Please enter valid index type"
        objects = []
        count = 0
        with open(data_path) as f:
            for line in f:
                word_split = line.split("\t")
                cin = word_split[0]
                name = word_split[1].strip()
                doc = {'cin':cin, 'name':name}
                objects.append(doc)
                if len(objects) > 1000:
                    response = self.connection.bulk_index(self.index_name, index_type, objects, id_field='cin')
                    objects = []
            self.connection.bulk_index(self.index_name, index_type, objects, id_field='cin')

Exemple #12

0

Afficher le fichier

Fichier : setupES.py Projet : smorovic/hltd

def main():
    if len(sys.argv) > 3:
        print "Invalid argument number"
        sys.exit(1)
    if len(sys.argv) < 2:
        print "Please provide an elasticsearch server url (e.g. http://localhost:9200)"
        sys.exit(1)

    deleteOld=False
    if len(sys.argv)>2:
        if "replace" in sys.argv[2]:
            deleteOld=True

    es_server_url = sys.argv[1]
    ip_url=getURLwithIP(es_server_url)
    es = ElasticSearch(es_server_url)

    #get_template
    #es.send_request('GET', ['_template', name],query_params=query_params)

    #list_template
    res = es.cluster_state(metric='metadata')
    templateList = res['metadata']['templates']

    for template_name in TEMPLATES:
        if template_name not in templateList: 
            print "{0} template not present. It will be created. ".format(template_name)
            create_template(es,template_name)
        else:
            if deleteOld==False:
                print "{0} already exists. Add 'replace' parameter to force update.".format(template_name)
            else:
                print "{0} already exists.".format(template_name)
                delete_template(es,template_name)
                print "Deleted old template and will recreate {0}".format(template_name)
                create_template(es,template_name)

Exemple #13

0

Afficher le fichier

Fichier : elasticbu.py Projet : diguida/hltd

    def updateIndexMaybe(self,index_name,alias_write,alias_read,settings,mapping):
        connectionAttempts=0
        retry=False
        while True:
            if self.stopping:break
            connectionAttempts+=1
            try:
                if retry or self.ip_url==None:
                    self.ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                    self.es = ElasticSearch(self.ip_url,timeout=20)

                #check if runindex alias exists
                if requests.get(self.ip_url+'/_alias/'+alias_write).status_code == 200: 
                    self.logger.info('writing to elastic index '+alias_write + ' on '+self.es_server_url+' - '+self.ip_url )
                    self.createDocMappingsMaybe(alias_write,mapping)
                    break
                else:
                    time.sleep(.5)
                    if (connectionAttempts%10)==0:
                        self.logger.error('unable to access to elasticsearch alias ' + alias_write + ' on '+self.es_server_url+' / '+self.ip_url)
                    continue
            except ElasticHttpError as ex:
                #es error, retry
                self.logger.error(ex)
                if self.runMode and connectionAttempts>100:
                    self.logger.error('elastic (BU): exiting after 100 ElasticHttpError reports from '+ self.es_server_url)
                    sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry=True
                continue

            except (socket.gaierror,ConnectionError,Timeout,RequestsConnectionError,RequestsTimeout) as ex:
                #try to reconnect with different IP from DNS load balancing
                if self.runMode and connectionAttempts>100:
                   self.logger.error('elastic (BU): exiting after 100 connection attempts to '+ self.es_server_url)
                   sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                   self.threadEvent.wait(60)
                else:
                   self.threadEvent.wait(1)
                retry=True
                continue

Exemple #14

0

Afficher le fichier

 def __init__(self, es_server_url, runstring, indexSuffix, monBufferSize,
              fastUpdateModulo):
     self.logger = logging.getLogger(self.__class__.__name__)
     self.istateBuffer = []
     self.prcinBuffer = {}
     self.prcoutBuffer = {}
     self.fuoutBuffer = {}
     self.es = ElasticSearch(es_server_url, timeout=20)
     self.hostname = os.uname()[1]
     self.hostip = socket.gethostbyname_ex(self.hostname)[2][0]
     #self.number_of_data_nodes = self.es.health()['number_of_data_nodes']
     self.settings = {"index.routing.allocation.require._ip": self.hostip}
     self.indexCreated = False
     self.indexFailures = 0
     self.monBufferSize = monBufferSize
     self.fastUpdateModulo = fastUpdateModulo
     aliasName = runstring + "_" + indexSuffix
     self.indexName = aliasName  # + "_" + self.hostname

Exemple #15

0

Afficher le fichier

Fichier : collate.py Projet : cmsdaq/hltd

 def __init__(self,es_server_url):
     self.server = ElasticSearch(es_server_url)
     self.datadict = {
         'prc-out' : {
             "lookup" : Query('prc-out','source'),
             "action" : {
                 'definition' : Aggregator('drop'),
                 'data': Aggregator({'in': Aggregator('add'),
                                     'out': Aggregator('add'),
                                     'file':Aggregator('cat')
                                     }),
                 'ls' : Aggregator('check'),
                 'stream' : Aggregator('check'),
                 'source' : Aggregator('match')
                 }
             },
         'prc-in' : {
             "lookup" : Query('prc-in','dest'),
             "action" : {
                 'definition' : Aggregator('drop'),
                 'data': Aggregator({
                         'out'    : Aggregator('add'),
                         }),
                 'ls'     : Aggregator('check'),
                 'index'  : Aggregator('cat'),
                 'source' : Aggregator('check'),
                 'dest'   : Aggregator('check'),
                 'process': Aggregator('cat')
                 }
             },
         'prc-s-state' : {
             "lookup" : Query('prc-s-state'),
             "action" : {
                 'macro'  : Aggregator('histoadd'),
                 'mini'   : Aggregator('histoadd'),
                 'micro'  : Aggregator('histoadd'),
                 'tp'     : Aggregator('add'),
                 'lead'   : Aggregator('avg'),
                 'nfiles' : Aggregator('add'),
                 'ls'     : Aggregator('check'),
                 'process': Aggregator('cat')
                 }
             }
         }

Exemple #16

0

Afficher le fichier

Fichier : elasticbu.py Projet : zazasa/hltd

 def index_documents(self,name,documents):
     attempts=0
     while True:
         attempts+=1
         try:
             self.es.bulk_index(self.index_name,name,documents)
             return True
         except ElasticHttpError as ex:
             if attempts<=1:continue
             self.logger.error('elasticsearch HTTP error. skipping document '+name)
             #self.logger.exception(ex)
             return False
         except (ConnectionError,Timeout) as ex:
             if attempts>100 and self.runMode:
                 raise(ex)
             self.logger.error('elasticsearch connection error. retry.')
             if self.stopping:return False
             time.sleep(0.1)
             ip_url=getURLwithIP(self.es_server_url)
             self.es = ElasticSearch(ip_url)
     return False

Exemple #17

0

Afficher le fichier

Fichier : collate.py Projet : diguida/hltd

 def __init__(self, es_server_url):
     self.server = ElasticSearch(es_server_url)
     self.datadict = {
         "prc-out": {
             "lookup": Query("prc-out", "source"),
             "action": {
                 "definition": Aggregator("drop"),
                 "data": Aggregator({"in": Aggregator("add"), "out": Aggregator("add"), "file": Aggregator("cat")}),
                 "ls": Aggregator("check"),
                 "stream": Aggregator("check"),
                 "source": Aggregator("match"),
             },
         },
         "prc-in": {
             "lookup": Query("prc-in", "dest"),
             "action": {
                 "definition": Aggregator("drop"),
                 "data": Aggregator({"out": Aggregator("add")}),
                 "ls": Aggregator("check"),
                 "index": Aggregator("cat"),
                 "source": Aggregator("check"),
                 "dest": Aggregator("check"),
                 "process": Aggregator("cat"),
             },
         },
         "prc-s-state": {
             "lookup": Query("prc-s-state"),
             "action": {
                 "macro": Aggregator("histoadd"),
                 "mini": Aggregator("histoadd"),
                 "micro": Aggregator("histoadd"),
                 "tp": Aggregator("add"),
                 "lead": Aggregator("avg"),
                 "nfiles": Aggregator("add"),
                 "ls": Aggregator("check"),
                 "process": Aggregator("cat"),
             },
         },
     }

Exemple #18

0

Afficher le fichier

    def write_es_geo(self,
                     es_host='http://localhost:9200/',
                     index_name="geos",
                     doc_type='user_geos'):
        # try to connect with ES and delete the index
        es = ElasticSearch('http://localhost:9200/')

        ## uncomment the following code to prompt check
        # print "Will delete all the doc in the [index:type] from ElasticSearch:"
        # print index_name, ":", doc_type
        # confirm = raw_input("Sure?(y/n)")
        # if confirm!="y":
        # 	sys.exit(0)

        try:
            create_es()
        except Exception as e:
            print "Error", e
        else:
            print index_name, ":", doc_type, " deleted!"
        # initializing the documents
        documents = []
        for record in self.userGeos:
            doc = {
                'uid': int(record[0]),
                'location': {
                    'lat': record[1],
                    'lon': record[2]
                }
            }
            documents.append(doc)
        print "Bulk indexing", len(documents), "documents.."
        es.bulk_index(index_name, doc_type, documents, id_field='uid')
        es.refresh(index_name)
        # test usage
        print "results from ES,"
        query = {"from": 0, "size": 2000, 'query': {"match_all": {}}}
        res = es.search(query, index=index_name)
        print len(res['hits']['hits']), "documents found"
        print "sample result"
        print res['hits']['hits'][0]

Exemple #19

0

Afficher le fichier

Fichier : generate_user.py Projet : realmichaelzyy/cotl

	def write_es_geo(self, es_host='http://localhost:9200/', index_name="geos", doc_type='user_geos'):
		# try to connect with ES and delete the index
		es = ElasticSearch('http://localhost:9200/')

		## uncomment the following code to prompt check
		# print "Will delete all the doc in the [index:type] from ElasticSearch:"
		# print index_name, ":", doc_type
		# confirm = raw_input("Sure?(y/n)")
		# if confirm!="y":
		# 	sys.exit(0)


		try:
			create_es()
		except Exception as e:
			print "Error", e
		else:
			print index_name,":", doc_type," deleted!"
		# initializing the documents
		documents = []
		for record in self.userGeos:
			doc = {'uid':int(record[0]), 'location':{'lat':record[1],'lon':record[2]}}
			documents.append(doc)
		print "Bulk indexing", len(documents),"documents.."
		es.bulk_index(index_name, doc_type, documents, id_field='uid')
		es.refresh(index_name)
		# test usage
		print "results from ES,"
		query = {
			"from" : 0, "size" : 2000,
			'query': {
				 "match_all" : { }
			 }
		 }
		res =  es.search(query, index=index_name)
		print len(res['hits']['hits']), "documents found"
		print "sample result"
		print res['hits']['hits'][0]

Exemple #20

0

Afficher le fichier

Fichier : elasticbu.py Projet : cmsdaq/hltd

class elasticBandBU:

    def __init__(self,conf,runnumber,startTime,runMode=True,nsslock=None,box_version=None,update_run_mapping=True,update_box_mapping=True):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.conf=conf
        self.es_server_url=conf.elastic_runindex_url
        self.runindex_write="runindex_"+conf.elastic_runindex_name+"_write"
        self.runindex_read="runindex_"+conf.elastic_runindex_name+"_read"
        self.runindex_name="runindex_"+conf.elastic_runindex_name
        self.boxinfo_write="boxinfo_"+conf.elastic_runindex_name+"_write"
        self.boxinfo_read="boxinfo_"+conf.elastic_runindex_name+"_read"
        self.boxinfo_name="boxinfo_"+conf.elastic_runindex_name
        self.boxdoc_version=box_version
        self.runnumber = str(runnumber)
        self.startTime = startTime
        self.host = os.uname()[1]
        self.stopping=False
        self.threadEvent = threading.Event()
        self.runMode=runMode
        self.boxinfoFUMap = {}
        self.ip_url=None
        self.nsslock=nsslock
        if update_run_mapping:
            self.updateIndexMaybe(self.runindex_name,self.runindex_write,self.runindex_read,mappings.central_es_settings_runindex,mappings.central_runindex_mapping)
        if update_box_mapping:
            self.updateIndexMaybe(self.boxinfo_name,self.boxinfo_write,self.boxinfo_read,mappings.central_es_settings_boxinfo,mappings.central_boxinfo_mapping)
        #silence
        eslib_logger = logging.getLogger('elasticsearch')
        eslib_logger.setLevel(logging.ERROR)

        self.black_list=None
        if self.conf.instance=='main':
            self.hostinst = self.host
        else:
            self.hostinst = self.host+'_'+self.conf.instance

        #this naturally fits with the 'run' document
	retries=10
        if runMode == True:
	  while retries:
	    retries-=1
            try:
              version = None
              arch = None
              hltmenuname = None
              with open(os.path.join(mainDir,'hlt',conf.paramfile_name),'r') as fp:
                fffparams = json.load(fp)
                version = fffparams['CMSSW_VERSION']
                arch = fffparams['SCRAM_ARCH']
                self.logger.info("OK")
              with open(os.path.join(mainDir,'hlt','HltConfig.py'),'r') as fp:
                firstline = fp.readline().strip().strip("\n") #first line
                if firstline.startswith("#"):
                  hltmenuname = firstline.strip("#").strip()
              break
            except Exception as ex:
	      self.logger.info("failed to parse run metadata file "+str(ex)+". retries left "+str(retries))
	      time.sleep(0.2)

        #write run number document
        if runMode == True and self.stopping==False:
            document = {}
            doc_id = self.runnumber
            document['runNumber'] = doc_id
            document['startTime'] = startTime
            document['activeBUs'] = 1
            document['totalBUs'] = 1
            document['rawDataSeenByHLT']=False
            if version: document['CMSSW_version']=version
            if arch: document['CMSSW_arch']=arch
            if hltmenuname and len(hltmenuname): document['HLT_menu']=hltmenuname
            documents = [document]
            ret = self.index_documents('run',documents,doc_id,bulk=False,overwrite=False)
            if isinstance(ret,tuple) and ret[1]==409:
                #run document was already created by another BU. In that case increase atomically active BU counter
                #self.index_documents('run',[{"inline":"ctx._source.activeBUs+=1;ctx._source.totalBUs+=1","lang":"painless"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300)
                self.index_documents('run',[{"inline":"ctx._source.activeBUs+=1;ctx._source.totalBUs+=1"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300)


    def updateIndexMaybe(self,index_name,alias_write,alias_read,settings,mapping):
        connectionAttempts=0
        retry=False
        while True:
            if self.stopping:break
            connectionAttempts+=1
            try:
                if retry or self.ip_url==None:
                    self.ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                    self.es = ElasticSearch(self.ip_url,timeout=20)

                #check if index alias exists
                if requests.get(self.ip_url+'/_alias/'+alias_write).status_code == 200:
                    self.logger.info('writing to elastic index '+alias_write + ' on '+self.es_server_url+' - '+self.ip_url )
                    self.createDocMappingsMaybe(alias_write,mapping)
                    break
                else:
                    time.sleep(.5)
                    if (connectionAttempts%10)==0:
                        self.logger.error('unable to access to elasticsearch alias ' + alias_write + ' on '+self.es_server_url+' / '+self.ip_url)
                    continue
            except ElasticHttpError as ex:
                #es error, retry
                self.logger.error(ex)
                if self.runMode and connectionAttempts>100:
                    self.logger.error('elastic (BU): exiting after 100 ElasticHttpError reports from '+ self.es_server_url)
                    sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry=True
                continue

            except (socket.gaierror,ConnectionError,Timeout,RequestsConnectionError,RequestsTimeout) as ex:
                #try to reconnect with different IP from DNS load balancing
                if self.runMode and connectionAttempts>100:
                    self.logger.error('elastic (BU): exiting after 100 connection attempts to '+ self.es_server_url)
                    sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry=True
                continue

    def createDocMappingsMaybe(self,index_name,mapping):
        #update in case of new documents added to mapping definition
        for key in mapping:
            doc = {key:mapping[key]}
            res = requests.get(self.ip_url+'/'+index_name+'/'+key+'/_mapping')
            #only update if mapping is empty
            if res.status_code==200:
                if res.content.strip()=='{}':
                    self.logger.info('inserting new mapping for '+str(key))
                    requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
                else:
                    #still check if number of properties is identical in each type
                    inmapping = json.loads(res.content)
                    for indexname in inmapping:
                        properties = inmapping[indexname]['mappings'][key]['properties']

                        self.logger.info('checking mapping '+ indexname + '/' + key + ' which has '
                            + str(len(mapping[key]['properties'])) + '(index:' + str(len(properties)) + ') entries..')
                        for pdoc in mapping[key]['properties']:
                            if pdoc not in properties:
                                self.logger.info('inserting mapping for ' + str(key) + ' which is missing mapping property ' + str(pdoc))
                                requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
                                if res.status_code!=200: self.logger.warning('insert mapping reply status code '+str(res.status_code)+': '+res.content)
                                break
            else:
                self.logger.warning('requests error code '+res.status_code+' in mapping request')

    def read_line(self,fullpath):
        with open(fullpath,'r') as fp:
            return fp.readline()

    def elasticize_modulelegend(self,fullpath):

        self.logger.info(os.path.basename(fullpath))
        document = {}
        #document['_parent']= self.runnumber
        doc_id="microstatelegend_"+self.runnumber
        if fullpath.endswith('.jsn'):
            try:
                with open(fullpath,'r') as fp:
                    doc = json.load(fp)
                    document['stateNames'] = doc['names']
                    try:document['reserved'] = doc['reserved']
                    except:document['reserved'] = 33
                    try:document['special'] = doc['special']
                    except:document['special'] = 7
                    nstring = ""
                    cnt = 0
                    outputcnt = 0
                    #fill in also old format for now
                    for sname in doc['names']:
                        nstring+= str(cnt) + "=" + sname + " "
                        cnt+=1
                        if sname.startswith('hltOutput'):outputcnt+=1
                    try:document['output'] = doc['output']
                    except:document['output']=outputcnt
                    #document['names'] = nstring
            except Exception as ex:
                self.logger.warning("can not parse "+fullpath + ' ' + str(ex))
        else:
            #old format
            stub = self.read_line(fullpath)
            docnames= self.read_line(fullpath)
            document['reserved'] = 33
            document['special'] = 7
            outputcnt=0
            for sname in docnames.split():
                if "=hltOutput" in sname: outputcnt+=1
            document['output'] = outputcnt
            document['stateNames']=[]
            nameTokens = docnames.split()
            for nameToken in nameTokens:
                if '=' in nameToken:
                    idx,sn = nameToken.split('=')
                    document["stateNames"].append( sn )

        documents = [document]
        doc_pars = {"parent":str(self.runnumber)}
        return self.index_documents('microstatelegend',documents,doc_id,doc_params=doc_pars,bulk=False)


    def elasticize_pathlegend(self,fullpath):
        self.logger.info(os.path.basename(fullpath))
        document = {}
        #document['_parent']= self.runnumber
        doc_id="pathlegend_"+self.runnumber
        if fullpath.endswith('.jsn'):
            try:
                with open(fullpath,'r') as fp:
                    doc = json.load(fp)
                    document['stateNames'] = doc['names']
                    document['reserved'] = doc['reserved']
                    #put old name format value
                    nstring=""
                    cnt=0
                    for sname in doc['names']:
                        nstring+= str(cnt) + "=" + sname + " "
                        cnt+=1
                    document['names'] = nstring
            except Exception as ex:
                self.logger.warning("can not parse "+fullpath)
        else:
            stub = self.read_line(fullpath)
            document['names']= self.read_line(fullpath)
        documents = [document]
        doc_pars = {"parent":str(self.runnumber)}
        return self.index_documents('pathlegend',documents,doc_id,doc_params=doc_pars,bulk=False)

    def elasticize_inputlegend(self,fullpath):
        self.logger.info(os.path.basename(fullpath))
        document = {}
        doc_id="inputstatelegend_"+self.runnumber
        try:
            with open(fullpath,'r') as fp:
                doc = json.load(fp)
                document['stateNames'] = doc['names']
        except Exception as ex:
            self.logger.warning("can not parse "+fullpath)
        documents = [document]
        doc_pars = {"parent":str(self.runnumber)}
        return self.index_documents('inputstatelegend',documents,doc_id,doc_params=doc_pars,bulk=False)

    def elasticize_stream_label(self,infile):
        #elasticize stream name information
        self.logger.info(infile.filepath)
        document = {}
        #document['_parent']= self.runnumber
        document['stream']=infile.stream[6:]
        doc_id=infile.basename
        doc_pars = {"parent":str(self.runnumber)}
        return self.index_documents('stream_label',[document],doc_id,doc_params=doc_pars,bulk=False)

    def elasticize_runend_time(self,endtime):
        self.logger.info(str(endtime)+" going into buffer")
        doc_id = self.runnumber
        #first update: endtime field
        self.index_documents('run',[{"endTime":endtime}],doc_id,bulk=False,update_only=True)
        #second update:decrease atomically active BU counter
        #self.index_documents('run',[{"inline":"ctx._source.activeBUs-=1","lang":"painless"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300)
        self.index_documents('run',[{"inline":"ctx._source.activeBUs-=1"}],doc_id,bulk=False,update_only=True,script=True,retry_on_conflict=300)

    def elasticize_resource_summary(self,jsondoc):
        self.logger.debug('injecting resource summary document')
        jsondoc['appliance']=self.host
        self.index_documents('resource_summary',[jsondoc],bulk=False)

    def elasticize_box(self,infile):

        basename = infile.basename
        self.logger.debug(basename)
        current_time = time.time()

        if infile.data=={}:return

        bu_doc=False
        if basename.startswith('bu') or basename.startswith('dvbu'):
            bu_doc=True

        #check box file against blacklist
        if bu_doc or self.black_list==None:
            self.black_list=[]

            try:
                with open(os.path.join(self.conf.watch_directory,'appliance','blacklist'),"r") as fi:
                    try:
                        self.black_list = json.load(fi)
                    except ValueError:
                        #file is being written or corrupted
                        return
            except:
                #blacklist file is not present, do not filter
                pass

        if basename in self.black_list:return

        if bu_doc==False:
            try:
                if self.boxdoc_version!=infile.data['version']:
                    self.logger.info('skipping '+basename+' box file version '+str(infile.data['version'])+' which is different from '+str(self.boxdoc_version))
                    return;
            except:
                self.logger.warning("didn't find version field in box file "+basename)
                return
            try:
                self.boxinfoFUMap[basename] = [infile.data,current_time]
            except Exception as ex:
                self.logger.warning('box info not injected: '+str(ex))
                return
        try:
            document = infile.data
            #unique id for separate instances
            if bu_doc:
                doc_id=self.hostinst
            else:
                doc_id=basename

            document['id']=doc_id
            try:
              document['activeRunList'] = map(int,document['activeRuns'])
            except:
              pass
            try:
              document['activeRuns'] = map(str,document['activeRuns'])
            except:
              pass
            document['appliance']=self.host
            document['instance']=self.conf.instance
            if bu_doc==True:
                document['blacklist']=self.black_list
            #only here
            document['host']=basename
            try:document.pop('version')
            except:pass
            try:document.pop('ip')
            except:pass
            try:document.pop('boot_id')
            except:pass
            self.index_documents('boxinfo',[document],doc_id,bulk=False)
        except Exception as ex:
            self.logger.warning('box info not injected: '+str(ex))
            return

    def elasticize_fubox(self,doc):
        try:
            doc_id = self.host
            doc['host']=doc_id
            self.index_documents('fu-box-status',[doc],doc_id,bulk=False)
        except Exception as ex:
            self.logger.warning('fu box status not injected: '+str(ex))

    def elasticize_eols(self,infile):
        basename = infile.basename
        self.logger.info(basename)
        data = infile.data['data']
        data.insert(0,infile.mtime)
        data.insert(0,infile.ls[2:])

        values = [int(f) if f.isdigit() else str(f) for f in data]
        try:
            keys = ["ls","fm_date","NEvents","NFiles","TotalEvents","NLostEvents","NBytes"]
            document = dict(zip(keys, values))
        except:
            #try without NBytes
            keys = ["ls","fm_date","NEvents","NFiles","TotalEvents","NLostEvents"]
            document = dict(zip(keys, values))

        doc_id = infile.name+"_"+self.host
        document['id'] = doc_id
        #document['_parent']= self.runnumber
        document['appliance']=self.host
        documents = [document]
        doc_pars = {"parent":str(self.runnumber)}
        self.index_documents('eols',documents,doc_id,doc_params=doc_pars,bulk=False)

    def index_documents(self,name,documents,doc_id=None,doc_params=None,bulk=True,overwrite=True,update_only=False,retry_on_conflict=0,script=False):

        if name=='fu-box-status' or name.startswith("boxinfo") or name=='resource_summary':
            destination_index = self.boxinfo_write
            is_box=True
        else:
            destination_index = self.runindex_write
            is_box=False
        attempts=0
        while True:
            attempts+=1
            try:
                if bulk:
                    self.es.bulk_index(destination_index,name,documents)
                else:
                    if doc_id:
                      if update_only:
                        if script:
                          self.es.update(index=destination_index,doc_type=name,id=doc_id,script=documents[0],upsert=False,retry_on_conflict=retry_on_conflict)
                        else:
                          self.es.update(index=destination_index,doc_type=name,id=doc_id,doc=documents[0],upsert=False,retry_on_conflict=retry_on_conflict)
                      else:
                        #overwrite existing can be used with id specified
                        if doc_params:
                          self.es.index(destination_index,name,documents[0],doc_id,parent=doc_params['parent'],overwrite_existing=overwrite)
                        else:
                          self.es.index(destination_index,name,documents[0],doc_id,overwrite_existing=overwrite)
                    else:
                        self.es.index(destination_index,name,documents[0])
                return True

            except ElasticHttpError as ex:
                if name=='run' and ex[0]==409: #create failed because overwrite was forbidden
                    return (False,ex[0])

                if ex[0]==429:
                  if attempts<10 and not is_box:
                    self.logger.warning('elasticsearch HTTP error 429'+str(ex)+'. retrying..')
                    time.sleep(.1)
                    continue
                else:
                  if attempts<=1 and not is_box:continue

                if is_box:
                    self.logger.warning('elasticsearch HTTP error '+str(ex)+'. skipping document '+name)
                else:
                    self.logger.error('elasticsearch HTTP error '+str(ex)+'. skipping document '+name)
                return False
            except (socket.gaierror,ConnectionError,Timeout) as ex:
                if attempts>100 and self.runMode:
                    raise(ex)
                if is_box or attempts<=1:
                    self.logger.warning('elasticsearch connection error' + str(ex)+'. retry.')
                elif (attempts-2)%10==0:
                    self.logger.error('elasticsearch connection error' + str(ex)+'. retry.')
                if self.stopping:return False
                ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                self.es = ElasticSearch(ip_url,timeout=20)
                time.sleep(0.1)
                if is_box==True:#give up on too many box retries as they are indexed again every 5 seconds
                  break
        return False

Exemple #21

0

Afficher le fichier

    plist = x[1][1]
    pdict = {}
    for i in range(len(plist)):
        pdict[i] = json.loads(plist[i][1])
    with POOL.connection() as connection:
        tagview = connection.table('top_tags')
        rowkey = "%016i" % int(x[0]) + hashlib.md5(str(x[1][0])).digest()
        tagview.put(rowkey, {
            "p:tag": str(x[1][0]),
            "p:dump": json.dumps(pdict)
        })


#sample input
# (u"102", ((5, 5), "{"photo": {"timeposted": 1422939564, "description": "pdes", "tags": "ptag1,ptag3", "URL": "purl", "title": "ptitle", "pid": "102", "location": {"latitude": "plat", "longitude": "plon"}}}"))
ES = ElasticSearch("http://localhost:9200")


def saveESDocuments(x):
    print "writing to hbase.., pid,", x[0]
    parsedrawdata = json.loads(x[1][1])
    document = {
        "pid": int(x[0]),
        "likes": x[1][0][0],
        "views": x[1][0][1],
        "location": {
            "lat": parsedrawdata["photo"]["location"]["latitude"],
            "lon": parsedrawdata["photo"]["location"]["longitude"]
        }
    }
    ES.index('photo_geos', 'photos', document, id=document['pid'])

Exemple #22

0

Afficher le fichier

    command = sys.argv[1]
    server_url = sys.argv[2]
    index_name = sys.argv[3]
else:
    print "Parameters: command[create,alias,mapping] server url, index.alias name (target index)"
    print "  COMMANDS:"
    print "    create: create index"
    print "    alias: create index *_read and *_write aliases (optional parameter: target index)"
    print "    create missing document mappings for the index"
    sys.exit(1)

if server_url.startswith('http://') == False:
    server_url = 'http://' + server_url

#connection
es = ElasticSearch(server_url)

#pick mapping
if index_name.startswith('runindex'):
    my_settings = mappings.central_es_settings
    my_mapping = mappings.central_runindex_mapping
if index_name.startswith('boxinfo'):
    my_settings = mappings.central_es_settings,
    my_mapping = mappings.central_boxinfo_mapping
if index_name.startswith('hltdlogs'):
    my_settings = mappings.central_es_settings_hltlogs
    my_mapping = mappings.central_hltdlogs_mapping

#alias convention
alias_write = index_name + "_write"
alias_read = index_name + "_read"

Exemple #23

0

Afficher le fichier

Fichier : elasticbu.py Projet : smorovic/hltd

class elasticBandBU:

    def __init__(self,conf,runnumber,startTime,runMode=True,nsslock=None):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.conf=conf
        self.es_server_url=conf.elastic_runindex_url
        self.runindex_write="runindex_"+conf.elastic_runindex_name+"_write"
        self.runindex_read="runindex_"+conf.elastic_runindex_name+"_read"
        self.runindex_name="runindex_"+conf.elastic_runindex_name
        self.boxinfo_write="boxinfo_"+conf.elastic_runindex_name+"_write"
        self.boxinfo_read="boxinfo_"+conf.elastic_runindex_name+"_read"
        self.boxinfo_name="boxinfo_"+conf.elastic_runindex_name
        self.runnumber = str(runnumber)
        self.startTime = startTime
        self.host = os.uname()[1]
        self.stopping=False
        self.threadEvent = threading.Event()
        self.runMode=runMode
        self.boxinfoFUMap = {}
        self.ip_url=None
        self.nsslock=nsslock
        self.updateIndexMaybe(self.runindex_name,self.runindex_write,self.runindex_read,mappings.central_es_settings,mappings.central_runindex_mapping)
        self.updateIndexMaybe(self.boxinfo_name,self.boxinfo_write,self.boxinfo_read,mappings.central_es_settings,mappings.central_boxinfo_mapping)
        self.black_list=None
        if self.conf.instance=='main':
            self.hostinst = self.host
        else:
            self.hostinst = self.host+'_'+self.conf.instance

        #write run number document
        if runMode == True and self.stopping==False:
            document = {}
            document['runNumber'] = self.runnumber
            document['startTime'] = startTime
            documents = [document]
            self.index_documents('run',documents)
            #except ElasticHttpError as ex:
            #    self.logger.info(ex)
            #    pass


    def updateIndexMaybe(self,index_name,alias_write,alias_read,settings,mapping):
        connectionAttempts=0
        retry=False
        while True:
            if self.stopping:break
            connectionAttempts+=1
            try:
                if retry or self.ip_url==None:
                    self.ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                    self.es = ElasticSearch(self.ip_url,timeout=20)

                #check if runindex alias exists
                if requests.get(self.ip_url+'/_alias/'+alias_write).status_code == 200: 
                    self.logger.info('writing to elastic index '+alias_write + ' on '+self.es_server_url+' - '+self.ip_url )
                    self.createDocMappingsMaybe(alias_write,mapping)
                    break
                else:
                    time.sleep(.5)
                    if (connectionAttempts%10)==0:
                        self.logger.error('unable to access to elasticsearch alias ' + alias_write + ' on '+self.es_server_url+' / '+self.ip_url)
                    continue
            except ElasticHttpError as ex:
                #es error, retry
                self.logger.error(ex)
                if self.runMode and connectionAttempts>100:
                    self.logger.error('elastic (BU): exiting after 100 ElasticHttpError reports from '+ self.es_server_url)
                    sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry=True
                continue

            except (socket.gaierror,ConnectionError,Timeout,RequestsConnectionError,RequestsTimeout) as ex:
                #try to reconnect with different IP from DNS load balancing
                if self.runMode and connectionAttempts>100:
                   self.logger.error('elastic (BU): exiting after 100 connection attempts to '+ self.es_server_url)
                   sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                   self.threadEvent.wait(60)
                else:
                   self.threadEvent.wait(1)
                retry=True
                continue

    def createDocMappingsMaybe(self,index_name,mapping):
        #update in case of new documents added to mapping definition
        for key in mapping:
            doc = {key:mapping[key]}
            res = requests.get(self.ip_url+'/'+index_name+'/'+key+'/_mapping')
            #only update if mapping is empty
            if res.status_code==200:
                if res.content.strip()=='{}':
                    requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
                else:
                    #still check if number of properties is identical in each type
                    inmapping = json.loads(res.content)
                    for indexname in inmapping:
                        properties = inmapping[indexname]['mappings'][key]['properties']

                        self.logger.info('checking mapping '+ indexname + '/' + key + ' which has '
                            + str(len(mapping[key]['properties'])) + '(index:' + str(len(properties)) + ') entries..')

                        #should be size 1
                        for pdoc in mapping[key]['properties']:
                            if pdoc not in properties:
                                requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
                                break
            else:
                self.logger.warning('requests error code '+res.status_code+' in mapping request')

    def read_line(self,fullpath):
        with open(fullpath,'r') as fp:
            return fp.readline()
    
    def elasticize_modulelegend(self,fullpath):

        self.logger.info(os.path.basename(fullpath))
        stub = self.read_line(fullpath)
        document = {}
        document['_parent']= self.runnumber
        document['id']= "microstatelegend_"+self.runnumber
        document['names']= self.read_line(fullpath)
        documents = [document]
        return self.index_documents('microstatelegend',documents)


    def elasticize_pathlegend(self,fullpath):

        self.logger.info(os.path.basename(fullpath))
        stub = self.read_line(fullpath)
        document = {}
        document['_parent']= self.runnumber
        document['id']= "pathlegend_"+self.runnumber
        document['names']= self.read_line(fullpath)
        documents = [document]
        return self.index_documents('pathlegend',documents)

    def elasticize_runend_time(self,endtime):

        self.logger.info(str(endtime)+" going into buffer")
        document = {}
        document['runNumber'] = self.runnumber
        document['startTime'] = self.startTime
        document['endTime'] = endtime
        documents = [document]
        self.index_documents('run',documents)

    def elasticize_box(self,infile):

        basename = infile.basename
        self.logger.debug(basename)
        current_time = time.time()

        if infile.data=={}:return

        bu_doc=False
        if basename.startswith('bu') or basename.startswith('dvbu'):
            bu_doc=True

        #check box file against blacklist
        if bu_doc or self.black_list==None:
            self.black_list=[]

            try:
                with open(os.path.join(self.conf.watch_directory,'appliance','blacklist'),"r") as fi:
                    try:
                        self.black_list = json.load(fi)
                    except ValueError:
                        #file is being written or corrupted
                        return
            except:
                #blacklist file is not present, do not filter
                pass

        if basename in self.black_list:return

        if bu_doc==False:
            try:
                self.boxinfoFUMap[basename] = [infile.data,current_time]
            except Exception as ex:
                self.logger.warning('box info not injected: '+str(ex))
                return
        try:
            document = infile.data
            #unique id for separate instances
            if bu_doc:
                document['id']=self.hostinst
            else:
                document['id']=basename

            #both here and in "boxinfo_appliance"
            document['appliance']=self.host
            document['instance']=self.conf.instance
            #only here
            document['host']=basename
            try:
                document['detectedStaleHandle']=bool(document['detectedStaleHandle']=='True')
            except:
                pass
            self.index_documents('boxinfo',[document])
        except Exception as ex:
            self.logger.warning('box info not injected: '+str(ex))
            return
        if bu_doc:
            try:
                document = infile.data
                try:
                    document.pop('id')
                except:pass
                try:
                    document.pop('host')
                except:pass
                #aggregation from FUs
                document['idles']=0
                document['used']=0
                document['broken']=0
                document['quarantined']=0
                document['cloud']=0
                document['usedDataDir']=0
                document['totalDataDir']=0
                document['hosts']=[basename]
                document['blacklistedHosts']=[]
                for key in self.boxinfoFUMap:
                    dpair = self.boxinfoFUMap[key]
                    d = dpair[0]
                    #check if entry is not older than 10 seconds
                    if current_time - dpair[1] > 10:continue
                    document['idles']+=int(d['idles'])
                    document['used']+=int(d['used'])
                    document['broken']+=int(d['broken'])
                    document['quarantined']+=int(d['quarantined'])
                    document['cloud']+=int(d['cloud'])
                    document['usedDataDir']+=int(d['usedDataDir'])
                    document['totalDataDir']+=int(d['totalDataDir'])
                    document['hosts'].append(key)
                for blacklistedHost in self.black_list:
                    document['blacklistedHosts'].append(blacklistedHost)
                self.index_documents('boxinfo_appliance',[document],bulk=False)
            except Exception as ex:
                #in case of malformed box info
                self.logger.warning('box info not injected: '+str(ex))
                return

    def elasticize_eols(self,infile):
        basename = infile.basename
        self.logger.info(basename)
        data = infile.data['data']
        data.insert(0,infile.mtime)
        data.insert(0,infile.ls[2:])
        
        values = [int(f) if f.isdigit() else str(f) for f in data]
        keys = ["ls","fm_date","NEvents","NFiles","TotalEvents","NLostEvents"]
        document = dict(zip(keys, values))

        document['id'] = infile.name+"_"+os.uname()[1]
        document['_parent']= self.runnumber
        documents = [document]
        self.index_documents('eols',documents)

    def index_documents(self,name,documents,bulk=True):
        attempts=0
        destination_index = ""
        is_box=False
        if name.startswith("boxinfo"):
          destination_index = self.boxinfo_write
          is_box=True
        else:
          destination_index = self.runindex_write
        while True:
            attempts+=1
            try:
                if bulk:
                    self.es.bulk_index(destination_index,name,documents)
                else:
                    self.es.index(destination_index,name,documents[0])
                return True
            except ElasticHttpError as ex:
                if attempts<=1:continue
                self.logger.error('elasticsearch HTTP error. skipping document '+name)
                if is_box==True:break
                #self.logger.exception(ex)
                return False
            except (socket.gaierror,ConnectionError,Timeout) as ex:
                if attempts>100 and self.runMode:
                    raise(ex)
                self.logger.error('elasticsearch connection error' + str(ex)+'. retry.')
                if self.stopping:return False
                ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                self.es = ElasticSearch(ip_url,timeout=20)
                time.sleep(0.1)
                if is_box==True:break
        return False

Exemple #24

0

Afficher le fichier

Fichier : collate.py Projet : cmsdaq/hltd

class Collation:
    def __init__(self,es_server_url):
        self.server = ElasticSearch(es_server_url)
        self.datadict = {
            'prc-out' : {
                "lookup" : Query('prc-out','source'),
                "action" : {
                    'definition' : Aggregator('drop'),
                    'data': Aggregator({'in': Aggregator('add'),
                                        'out': Aggregator('add'),
                                        'file':Aggregator('cat')
                                        }),
                    'ls' : Aggregator('check'),
                    'stream' : Aggregator('check'),
                    'source' : Aggregator('match')
                    }
                },
            'prc-in' : {
                "lookup" : Query('prc-in','dest'),
                "action" : {
                    'definition' : Aggregator('drop'),
                    'data': Aggregator({
                            'out'    : Aggregator('add'),
                            }),
                    'ls'     : Aggregator('check'),
                    'index'  : Aggregator('cat'),
                    'source' : Aggregator('check'),
                    'dest'   : Aggregator('check'),
                    'process': Aggregator('cat')
                    }
                },
            'prc-s-state' : {
                "lookup" : Query('prc-s-state'),
                "action" : {
                    'macro'  : Aggregator('histoadd'),
                    'mini'   : Aggregator('histoadd'),
                    'micro'  : Aggregator('histoadd'),
                    'tp'     : Aggregator('add'),
                    'lead'   : Aggregator('avg'),
                    'nfiles' : Aggregator('add'),
                    'ls'     : Aggregator('check'),
                    'process': Aggregator('cat')
                    }
                }
            }
    def lookup(self,doctype):
        return self.datadict[doctype]['lookup']
    def action(self,doctype):
        return self.datadict[doctype]['action']

#print datadict[type]['lookup']
    def search(self,ind,doctype,ls,stream=None):
        if stream:
            result=self.server.search(self.lookup(doctype)(ls,stream),
                             index=ind)
        else:
            result=self.server.search(self.lookup(doctype)(ls),
                             index=ind)
        return result

    def collate(self,ind,doctype,ls,stream=None):
        result = self.search(ind,doctype,ls,stream)
        for element in  result['hits']['hits']:
            for k,v in element['_source'].items():
                self.action(doctype)[k](v)
        retval = dict((k,v.value()) for k,v in self.action(doctype).items())
        for v in self.action(doctype).values(): v.reset()
        return retval

    def refresh(self,ind):
        self.server.refresh(ind)

    def stash(self,ind,doctype,doc):
        result=self.server.index(ind,doctype,doc)
        return result

Exemple #25

0

Afficher le fichier

Fichier : elasticbu.py Projet : zazasa/hltd

    def __init__(self,es_server_url,runnumber,startTime,runMode=True):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.es_server_url=es_server_url
        self.index_name=conf.elastic_runindex_name
        self.runnumber = str(runnumber)
        self.startTime = startTime
        self.host = os.uname()[1]
        self.stopping=False
        self.threadEvent = threading.Event()
        self.runMode=runMode
        self.settings = {
            "analysis":{
                "analyzer": {
                    "prefix-test-analyzer": {
                        "type": "custom",
                        "tokenizer": "prefix-test-tokenizer"
                    }
                },
                "tokenizer": {
                    "prefix-test-tokenizer": {
                        "type": "path_hierarchy",
                        "delimiter": " "
                    }
                }
             },
            "index":{
                'number_of_shards' : 10,
                'number_of_replicas' : 3
            },
        }

        self.run_mapping = {
            'run' : {
#                '_routing' :{
#                    'required' : True,
#                    'path'     : 'runNumber'
#                },
                '_id' : {
                    'path' : 'runNumber'
                },
                'properties' : {
                    'runNumber':{
                        'type':'integer'
                        },
                    'startTimeRC':{
                        'type':'date'
                            },
                    'stopTimeRC':{
                        'type':'date'
                            },
                    'startTime':{
                        'type':'date'
                            },
                    'endTime':{
                        'type':'date'
                            },
                    'completedTime' : {
                        'type':'date'
                            }
                },
                '_timestamp' : {
                    'enabled' : True,
                    'store'   : 'yes'
                    }
            },
            'microstatelegend' : {

                '_id' : {
                    'path' : 'id'
                },
                '_parent':{'type':'run'},
                'properties' : {
                    'names':{
                        'type':'string'
                        },
                    'id':{
                        'type':'string'
                        }
                    }
            },
            'pathlegend' : {

                '_id' : {
                    'path' : 'id'
                },
                '_parent':{'type':'run'},
                'properties' : {
                    'names':{
                        'type':'string'
                        },
                    'id':{
                        'type':'string'
                        }

                    }
                },
            'boxinfo' : {
                '_id'        :{'path':'id'},#TODO:remove
                'properties' : {
                    'fm_date'       :{'type':'date'},
                    'id'            :{'type':'string'},
                    'broken'        :{'type':'integer'},
                    'used'          :{'type':'integer'},
                    'idles'         :{'type':'integer'},
                    'quarantined'   :{'type':'integer'},
                    'usedDataDir'   :{'type':'integer'},
                    'totalDataDir'  :{'type':'integer'},
                    'usedRamdisk'   :{'type':'integer'},
                    'totalRamdisk'  :{'type':'integer'},
                    'usedOutput'    :{'type':'integer'},
                    'totalOutput'   :{'type':'integer'},
                    'activeRuns'    :{'type':'string'}
                    },
                '_timestamp' : { 
                    'enabled'   : True,
                    'store'     : "yes",
                    "path"      : "fm_date"
                    },
                '_ttl'       : { 'enabled' : True,
                              'default' :  '30d'
                    }
                },

            'boxinfo_last' : {
                '_id'        :{'path':'id'},
                'properties' : {
                    'fm_date'       :{'type':'date'},
                    'id'            :{'type':'string'},
                    'broken'        :{'type':'integer'},
                    'used'          :{'type':'integer'},
                    'idles'         :{'type':'integer'},
                    'quarantined'   :{'type':'integer'},
                    'usedDataDir'   :{'type':'integer'},
                    'totalDataDir'  :{'type':'integer'},
                    'usedRamdisk'   :{'type':'integer'},
                    'totalRamdisk'  :{'type':'integer'},
                    'usedOutput'    :{'type':'integer'},
                    'totalOutput'   :{'type':'integer'},
                    'activeRuns'    :{'type':'string'}
                    },
                '_timestamp' : { 
                    'enabled'   : True,
                    'store'     : "yes",
                    "path"      : "fm_date"
                    }
                },

            'eols' : {
                '_id'        :{'path':'id'},
                '_parent'    :{'type':'run'},
                'properties' : {
                    'fm_date'       :{'type':'date'},
                    'id'            :{'type':'string'},
                    'ls'            :{'type':'integer'},
                    'NEvents'       :{'type':'integer'},
                    'NFiles'        :{'type':'integer'},
                    'TotalEvents'   :{'type':'integer'}
                    },
                '_timestamp' : { 
                    'enabled'   : True,
                    'store'     : "yes",
                    "path"      : "fm_date"
                    },
                },
            'minimerge' : {
                '_id'        :{'path':'id'},
                '_parent'    :{'type':'run'},
                'properties' : {
                    'fm_date'       :{'type':'date'},
                    'id'            :{'type':'string'}, #run+appliance+stream+ls
                    'appliance'     :{'type':'string'},
                    'stream'        :{'type':'string','index' : 'not_analyzed'},
                    'ls'            :{'type':'integer'},
                    'processed'     :{'type':'integer'},
                    'accepted'      :{'type':'integer'},
                    'errorEvents'   :{'type':'integer'},
                    'size'          :{'type':'integer'},
                    }
                }
            }


        connectionAttempts=0
        while True:
            if self.stopping:break
            connectionAttempts+=1
            try:
                self.logger.info('writing to elastic index '+self.index_name)
                ip_url=getURLwithIP(es_server_url)
                self.es = ElasticSearch(es_server_url)
                self.es.create_index(self.index_name, settings={ 'settings': self.settings, 'mappings': self.run_mapping })
                break
            except ElasticHttpError as ex:
                #this is normally fine as the index gets created somewhere across the cluster
                if "IndexAlreadyExistsException" in str(ex):
                    self.logger.info(ex)
                    break
                else:
                    self.logger.error(ex)
                    if runMode and connectionAttempts>100:
                        self.logger.error('elastic (BU): exiting after 100 ElasticHttpError reports from '+ es_server_url)
                        sys.exit(1)
                    elif runMode==False and connectionAttempts>10:
                        self.threadEvent.wait(60)
                    else:
                        self.threadEvent.wait(1)
                    continue

            except (ConnectionError,Timeout) as ex:
                #try to reconnect with different IP from DNS load balancing
                if runMode and connectionAttempts>100:
                   self.logger.error('elastic (BU): exiting after 100 connection attempts to '+ es_server_url)
                   sys.exit(1)
                elif runMode==False and connectionAttempts>10:
                   self.threadEvent.wait(60)
                else:
                   self.threadEvent.wait(1)
                continue
            
        #write run number document
        if runMode == True:
            document = {}
            document['runNumber'] = self.runnumber
            document['startTime'] = startTime
            documents = [document]
            self.index_documents('run',documents)

Exemple #26

0

Afficher le fichier

Fichier : es_test.py Projet : patrickzheng/cotl

from pyelasticsearch.client import ElasticSearch
import sys

# by default we connect to localhost:9200


if __name__ == "__main__":
    if len(sys.argv) != 4:
        print "Usage: [*.py] [lat] [lon] [R]"
        sys.exit(0)
    es = ElasticSearch("http://localhost:9200/")
    lat = float(sys.argv[1])
    lon = float(sys.argv[2])
    r = float(sys.argv[3])
    print lat, lon, r
    query = {
        "from": 0,
        "size": 10,
        "query": {"match_all": {}},
        "filter": {"geo_distance": {"distance": str(r) + "km", "location": {"lat": lat, "lon": lon}}},
        "sort": [{"_geo_distance": {"location": {"lat": lat, "lon": lon}, "order": "asc", "unit": "km"}}],
    }
    query1 = {"from": 0, "size": 10, "query": {"match_all": {}}, "sort": [{"likes": {"order": "desc"}}, "_score"]}
    query_count = {"facets": {"count_by_type": {"terms": {"field": "_type"}}}}
    # res =  es.search(query, index='photo_geos',doc_type=['photos'])
    res = es.search(query_count, index="geos", doc_type=["user_geos"])
    print res
    sys.exit(0)

    uids = [
        (

Exemple #27

0

Afficher le fichier

Fichier : collate.py Projet : vanbesien/hltd

class Collation:
    def __init__(self, es_server_url):
        self.server = ElasticSearch(es_server_url)
        self.datadict = {
            'prc-out': {
                "lookup": Query('prc-out', 'source'),
                "action": {
                    'definition':
                    Aggregator('drop'),
                    'data':
                    Aggregator({
                        'in': Aggregator('add'),
                        'out': Aggregator('add'),
                        'file': Aggregator('cat')
                    }),
                    'ls':
                    Aggregator('check'),
                    'stream':
                    Aggregator('check'),
                    'source':
                    Aggregator('match')
                }
            },
            'prc-in': {
                "lookup": Query('prc-in', 'dest'),
                "action": {
                    'definition': Aggregator('drop'),
                    'data': Aggregator({
                        'out': Aggregator('add'),
                    }),
                    'ls': Aggregator('check'),
                    'index': Aggregator('cat'),
                    'source': Aggregator('check'),
                    'dest': Aggregator('check'),
                    'process': Aggregator('cat')
                }
            },
            'prc-s-state': {
                "lookup": Query('prc-s-state'),
                "action": {
                    'macro': Aggregator('histoadd'),
                    'mini': Aggregator('histoadd'),
                    'micro': Aggregator('histoadd'),
                    'tp': Aggregator('add'),
                    'lead': Aggregator('avg'),
                    'nfiles': Aggregator('add'),
                    'ls': Aggregator('check'),
                    'process': Aggregator('cat')
                }
            }
        }

    def lookup(self, doctype):
        return self.datadict[doctype]['lookup']

    def action(self, doctype):
        return self.datadict[doctype]['action']

#print datadict[type]['lookup']

    def search(self, ind, doctype, ls, stream=None):
        if stream:
            result = self.server.search(self.lookup(doctype)(ls, stream),
                                        index=ind)
        else:
            result = self.server.search(self.lookup(doctype)(ls), index=ind)
        return result

    def collate(self, ind, doctype, ls, stream=None):
        result = self.search(ind, doctype, ls, stream)
        for element in result['hits']['hits']:
            for k, v in element['_source'].items():
                self.action(doctype)[k](v)
        retval = dict((k, v.value()) for k, v in self.action(doctype).items())
        for v in self.action(doctype).values():
            v.reset()
        return retval

    def refresh(self, ind):
        self.server.refresh(ind)

    def stash(self, ind, doctype, doc):
        result = self.server.index(ind, doctype, doc)
        return result

Exemple #28

0

Afficher le fichier

Fichier : query_builder.py Projet : kgprocks/search_as_you_type

from pyelasticsearch.client import ElasticSearch
import pyelasticsearch
import sys
import os

SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(SCRIPT_DIR + '/..'))

from config import settings

connection = ElasticSearch(settings.ES_HOST)


class QueryBuilder:
    def __init__(self, query, size):

        self.size = size
        self.should_query = []
        self.searchQuery = query
        self.edgeword, self.keyword = self.processText()

    def processText(self):

        text_split = self.searchQuery.split(" ")
        if len(text_split) == 1:
            return self.searchQuery, None

        edgeword = text_split.pop()
        keyword = " ".join(text_split)
        return edgeword, keyword

Exemple #29

0

Afficher le fichier

Fichier : elasticsearch.py Projet : theorm/puerh

class Query(object):
    def __init__(self, url='http://localhost:9200/', index='events'):
        self._es = ElasticSearch(url)
        self._index = index


    def last_request_took(self):
        ''' A number of processing time in milliseconds
        reported by ElasticSearch without our client code overhead.
        '''
        return self._last_request_took

    def _build_filter(self, event_type, start=None, end=None, source=None, **kwargs):
        '''Build an 'AND' filter that combines filters:
            1. correct `event_type`
            2. more or equal than start time (if provided)
            3. less or equal than end time (if provided)
            4. filter by values of terms in kwargs
        '''


        filters = []

        # 0. event type
        filters.append({
            'term': {'_type': event_type}
        })

        if source is not None:
            filters.append({
                'term': {'source': source}
            })

        for term_name, term_values in kwargs.iteritems():
            if term_values:
                terms = {
                    'venue': term_values,
                    'execution': 'or'
                }
                filters.append({
                    # XXX see if this query speeds up things
                    # UPD: not really
                    # if mapping uses analyzed strings

                    # 'query': {
                    #     'query_string': {
                    #         'query': ' OR '.join(term_values)
                    #     }
                    # }
                    'terms': terms,
                })


        timestamp_range = {}
        if start:
            timestamp_range['gte'] = start
        if end:
            timestamp_range['lte'] = end

        # 1. if timestamp range is provided - add it
        if timestamp_range:
            filters.append({
                'range': {'timestamp': timestamp_range}
            })

        # BOOL filter is more performand than AND:
        # http://www.elasticsearch.org/blog/all-about-elasticsearch-filter-bitsets/
        # return {'and': filters}
        return {
            'bool': {
                'must': filters
            }
        }


    def total(self, event_type, start=None, end=None, venues=[], posters=[]):
        '''Returns event's sum of deltas broken down per source:
            {
                'IG': 25.0,
                'FB': 3.0,
                ...
            }

            Can be filtered by start and end dates, venues or posters.
        '''

        filters = self._build_filter(event_type, start=start, end=end, 
            venues=venues, posters=posters)

        query = {
            'facets': {
                'events_deltas_totals': {
                    'terms_stats': {
                        'key_field': 'source',
                        'value_field': 'delta'
                    },
                    'facet_filter': filters
                }
            }
        }


        result = self._es._search_or_count(
            '_search',
            query, 
            index=self._index, 
            query_params={'search_type': 'count'}
        )
        self._last_request_took = result['took']
        facets = result['facets']['events_deltas_totals']['terms']
        return {
            f['term']: f['total']
            for f in facets
        }

    def top_terms(self, event_type, term, limit=10, start=None, end=None, venues=[], posters=[]):
        '''Returns `limit` top terms with their count.
        `term` can be one of: `poster`, `venue`, `source`.

        This is a more flexible version of top posters.

        The rest of arguments do the same as in `total` function.
        '''

        assert term in ('poster', 'venue', 'source')

        filters = self._build_filter(event_type, start=start, 
            end=end, venues=venues, posters=posters)

        query = {
            'facets': {
                'top': {
                    'terms': {
                        'field': term,
                        'size': limit
                    },
                    'facet_filter': filters
                }
            }
        }

        result = self._es._search_or_count(
            '_search',
            query, 
            index=self._index, 
            query_params={'search_type': 'count'}
        )
        self._last_request_took = result['took']
        facets = result['facets']['top']['terms']
        return facets


    def _format_histogram_facet_values(self, values):
        return [
            {'time': datetime.utcfromtimestamp(v['time']/1000), 'total': v['total']}
            for v in values['entries']
        ]

    def histogram(self, event_type, interval, start=None, end=None, 
            venues=[], posters=[], sources_facets=PREFIXES, include_total=False):
        '''Returns histogram of events deltas totals in buckets by `interval` apart.

        {
            'total': [
                {'time': <datetime-1>, 'total': 3.0},
                {'time': <datetime-2>, 'total': 1.0},
            ],
            'FB': [
                ...
            ]
        }

        Filter parameters are the same as in `total` method.

        Source facets are taken from `sources_facets` param. If you don't want them,
        just pass an empty list.

        Total facet is included by default. If you don't need it, set 
        `include_total` to `False`.
        '''
        
        filter_builder = partial(self._build_filter, event_type, start=start, 
            end=end, venues=venues, posters=posters)


        date_histogram_value = {
            'key_field': 'timestamp',
            'value_field': 'delta',
            'interval': interval,
        }

        facets = {}

        if include_total:
            filters = filter_builder(source=None)
            facets['total'] = {
                'date_histogram': date_histogram_value,
                'facet_filter': filters
            }


        for source in sources_facets:
            filters = filter_builder(source=source)

            payload = {
                'date_histogram': date_histogram_value,
                'facet_filter': filters
            }
            facets[source] = payload

        result = self._es._search_or_count(
            '_search',
            query={'facets': facets}, 
            index=self._index, 
            query_params={'search_type': 'count'}
        )
        self._last_request_took = result['took']

        return {
            facet: self._format_histogram_facet_values(values) 
            for facet, values in result['facets'].iteritems()
        }

Exemple #30

0

Afficher le fichier

Fichier : elasticsearch.py Projet : theorm/puerh

class Indexer(object):
    def __init__(self, url='http://localhost:9200/', index='events'):
        self._es = ElasticSearch(url)
        self._es.json_encoder = ESJSONEncoder
        self._index = index


    def cleanup(self):
        try:
            self._es.delete_index(self._index)
        except ElasticHttpNotFoundError:
            pass
        self._es.create_index(self._index, settings={
            'index': {
                'mapper': {
                    'dynamic': False
                }
            }
        })

        not_analyzed_mapping = {
            'properties': {
                'timestamp': {'type': 'date', 'format': 'dateOptionalTime'},
                'source': {'type': 'string', 'index': 'not_analyzed'},
                'venue': {'type': 'string', 'index': 'not_analyzed'},
                'poster': {'type': 'string', 'index': 'not_analyzed'},
                'delta': {'type': 'integer'}
            }
        }

        analyzed_mapping = {
            'properties': {
                'timestamp': {'type': 'date', 'format': 'dateOptionalTime'},
                'source': {'type': 'string', 'analyzer': 'keyword'},
                'venue': {'type': 'string', 'analyzer': 'keyword'},
                'poster': {'type': 'string', 'analyzer': 'keyword'},
                'delta': {'type': 'integer'}
            }
        }

        hybrid_mapping = {
            'properties': {
                'timestamp': {'type': 'date', 'format': 'dateOptionalTime'},
                'source': {'type': 'string', 'analyzer': 'keyword'},
                'venue': {'type': 'string', 'analyzer': 'whitespace'},
                'poster': {'type': 'string', 'analyzer': 'whitespace'},
                'delta': {'type': 'integer'}
            }
        }


        mapping = not_analyzed_mapping

        self._es.put_mapping(self._index, 'post', {'post': mapping})

    def add(self, event):

        data = {
            'timestamp': event['timestamp'],
            'source': event['_id']['source'],
            'venue': '{}-{}'.format(event['_id']['source'], event['venue']),
            'poster': '{}-{}'.format(event['_id']['source'], event['poster']),
            'delta': event.get('delta', 1)
        }

        self._es.index(
            self._index,
            event.get('type').lower(),
            data,
            id='{source}-{id}'.format(**event['_id'])
        )

Exemple #31

0

Afficher le fichier

Fichier : elasticbu.py Projet : diguida/hltd

class elasticBandBU:

    def __init__(self,conf,runnumber,startTime,runMode=True,nsslock=None,box_version=None):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.conf=conf
        self.es_server_url=conf.elastic_runindex_url
        self.runindex_write="runindex_"+conf.elastic_runindex_name+"_write"
        self.runindex_read="runindex_"+conf.elastic_runindex_name+"_read"
        self.runindex_name="runindex_"+conf.elastic_runindex_name
        self.boxinfo_write="boxinfo_"+conf.elastic_runindex_name+"_write"
        self.boxinfo_read="boxinfo_"+conf.elastic_runindex_name+"_read"
        self.boxinfo_name="boxinfo_"+conf.elastic_runindex_name
        self.boxdoc_version=box_version
        self.runnumber = str(runnumber)
        self.startTime = startTime
        self.host = os.uname()[1]
        self.stopping=False
        self.threadEvent = threading.Event()
        self.runMode=runMode
        self.boxinfoFUMap = {}
        self.ip_url=None
        self.nsslock=nsslock
        self.updateIndexMaybe(self.runindex_name,self.runindex_write,self.runindex_read,mappings.central_es_settings,mappings.central_runindex_mapping)
        self.updateIndexMaybe(self.boxinfo_name,self.boxinfo_write,self.boxinfo_read,mappings.central_es_settings,mappings.central_boxinfo_mapping)
        self.black_list=None
        if self.conf.instance=='main':
            self.hostinst = self.host
        else:
            self.hostinst = self.host+'_'+self.conf.instance

        #write run number document
        if runMode == True and self.stopping==False:
            document = {}
            document['runNumber'] = self.runnumber
            document['startTime'] = startTime
            documents = [document]
            self.index_documents('run',documents)
            #except ElasticHttpError as ex:
            #    self.logger.info(ex)
            #    pass


    def updateIndexMaybe(self,index_name,alias_write,alias_read,settings,mapping):
        connectionAttempts=0
        retry=False
        while True:
            if self.stopping:break
            connectionAttempts+=1
            try:
                if retry or self.ip_url==None:
                    self.ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                    self.es = ElasticSearch(self.ip_url,timeout=20)

                #check if runindex alias exists
                if requests.get(self.ip_url+'/_alias/'+alias_write).status_code == 200: 
                    self.logger.info('writing to elastic index '+alias_write + ' on '+self.es_server_url+' - '+self.ip_url )
                    self.createDocMappingsMaybe(alias_write,mapping)
                    break
                else:
                    time.sleep(.5)
                    if (connectionAttempts%10)==0:
                        self.logger.error('unable to access to elasticsearch alias ' + alias_write + ' on '+self.es_server_url+' / '+self.ip_url)
                    continue
            except ElasticHttpError as ex:
                #es error, retry
                self.logger.error(ex)
                if self.runMode and connectionAttempts>100:
                    self.logger.error('elastic (BU): exiting after 100 ElasticHttpError reports from '+ self.es_server_url)
                    sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry=True
                continue

            except (socket.gaierror,ConnectionError,Timeout,RequestsConnectionError,RequestsTimeout) as ex:
                #try to reconnect with different IP from DNS load balancing
                if self.runMode and connectionAttempts>100:
                   self.logger.error('elastic (BU): exiting after 100 connection attempts to '+ self.es_server_url)
                   sys.exit(1)
                elif self.runMode==False and connectionAttempts>10:
                   self.threadEvent.wait(60)
                else:
                   self.threadEvent.wait(1)
                retry=True
                continue

    def createDocMappingsMaybe(self,index_name,mapping):
        #update in case of new documents added to mapping definition
        for key in mapping:
            doc = {key:mapping[key]}
            res = requests.get(self.ip_url+'/'+index_name+'/'+key+'/_mapping')
            #only update if mapping is empty
            if res.status_code==200:
                if res.content.strip()=='{}':
                    self.logger.info('inserting new mapping for '+str(key))
                    requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
                else:
                    #still check if number of properties is identical in each type
                    inmapping = json.loads(res.content)
                    for indexname in inmapping:
                        properties = inmapping[indexname]['mappings'][key]['properties']

                        self.logger.info('checking mapping '+ indexname + '/' + key + ' which has '
                            + str(len(mapping[key]['properties'])) + '(index:' + str(len(properties)) + ') entries..')
                        for pdoc in mapping[key]['properties']:
                            if pdoc not in properties:
                                self.logger.info('inserting mapping for ' + str(key) + ' which is missing mapping property ' + str(pdoc))
                                requests.post(self.ip_url+'/'+index_name+'/'+key+'/_mapping',json.dumps(doc))
                                break
            else:
                self.logger.warning('requests error code '+res.status_code+' in mapping request')

    def read_line(self,fullpath):
        with open(fullpath,'r') as fp:
            return fp.readline()
    
    def elasticize_modulelegend(self,fullpath):

        self.logger.info(os.path.basename(fullpath))
        document = {}
        document['_parent']= self.runnumber
        document['id']= "microstatelegend_"+self.runnumber
        if fullpath.endswith('.jsn'):
          try:
            with open(fullpath,'r') as fp:
              doc = json.load(fp)
              document['stateNames'] = doc['names']
              try:document['reserved'] = doc['reserved']
              except:document['reserved'] = 33
              try:document['special'] = doc['special']
              except:document['special'] = 7
              nstring = ""
              cnt = 0
              outputcnt = 0
              #fill in also old format for now
              for sname in doc['names']:
                  nstring+= str(cnt) + "=" + sname + " "
                  cnt+=1
                  if sname.startswith('hltOutput'):outputcnt+=1
              try:document['output'] = doc['output']
              except:document['output']=outputcnt
              document['names'] = nstring
          except Exception as ex:
            self.logger.warning("can not parse "+fullpath + ' ' + str(ex))
        else:
          #old format
          stub = self.read_line(fullpath)
          document['names']= self.read_line(fullpath)
          document['reserved'] = 33
          document['special'] = 7
          outputcnt=0
          for sname in document['names'].split():
            if "=hltOutput" in sname: outputcnt+=1
          document['output'] = outputcnt
          document['stateNames']=[]
          nameTokens = document['names'].split()
          for nameToken in nameTokens:
           if '=' in nameToken:
             idx,sn = nameToken.split('=')
             document["stateNames"].append( sn )
          
        documents = [document]
        return self.index_documents('microstatelegend',documents)


    def elasticize_pathlegend(self,fullpath):
        self.logger.info(os.path.basename(fullpath))
        document = {}
        document['_parent']= self.runnumber
        document['id']= "pathlegend_"+self.runnumber
        if fullpath.endswith('.jsn'):
          try:
            with open(fullpath,'r') as fp:
              doc = json.load(fp)
              document['stateNames'] = doc['names']
              document['reserved'] = doc['reserved']
              #put old name format value
              nstring=""
              cnt=0
              for sname in doc['names']:
                nstring+= str(cnt) + "=" + sname + " "
                cnt+=1
              document['names'] = nstring
          except Exception as ex:
            self.logger.warning("can not parse "+fullpath)
        else:
          stub = self.read_line(fullpath)
          document['names']= self.read_line(fullpath)
        documents = [document]
        return self.index_documents('pathlegend',documents)

    def elasticize_stream_label(self,infile):
        #elasticize stream name information
        self.logger.info(infile.filepath)
        document = {}
        document['_parent']= self.runnumber
        document['stream']=infile.stream[6:]
        document['id']=infile.basename
        return self.index_documents('stream_label',[document])

    def elasticize_runend_time(self,endtime):

        self.logger.info(str(endtime)+" going into buffer")
        document = {}
        document['runNumber'] = self.runnumber
        document['startTime'] = self.startTime
        document['endTime'] = endtime
        documents = [document]
        self.index_documents('run',documents)

    def elasticize_resource_summary(self,jsondoc):
        self.logger.debug('injecting resource summary document')
        jsondoc['appliance']=self.host
        self.index_documents('resource_summary',[jsondoc],bulk=False)

    def elasticize_box(self,infile):

        basename = infile.basename
        self.logger.debug(basename)
        current_time = time.time()

        if infile.data=={}:return

        bu_doc=False
        if basename.startswith('bu') or basename.startswith('dvbu'):
            bu_doc=True

        #check box file against blacklist
        if bu_doc or self.black_list==None:
            self.black_list=[]

            try:
                with open(os.path.join(self.conf.watch_directory,'appliance','blacklist'),"r") as fi:
                    try:
                        self.black_list = json.load(fi)
                    except ValueError:
                        #file is being written or corrupted
                        return
            except:
                #blacklist file is not present, do not filter
                pass

        if basename in self.black_list:return

        if bu_doc==False:
            try:
                if self.boxdoc_version<infile.data['version']:
                    self.logger.info('skipping '+basename+' box file version '+str(infile.data['version'])+' which is newer than '+str(self.boxdoc_version))
                    return;
            except:
                self.logger.warning("didn't find version field in box file "+basename)
                return
            try:
                self.boxinfoFUMap[basename] = [infile.data,current_time]
            except Exception as ex:
                self.logger.warning('box info not injected: '+str(ex))
                return
        try:
            document = infile.data
            #unique id for separate instances
            if bu_doc:
                document['id']=self.hostinst
            else:
                document['id']=basename

            document['activeRuns'] = str(document['activeRuns']).strip('[]')
            document['appliance']=self.host
            document['instance']=self.conf.instance
            if bu_doc==True:
              document['blacklist']=self.black_list
            #only here
            document['host']=basename
            try:document.pop('version')
            except:pass
            self.index_documents('boxinfo',[document])
        except Exception as ex:
            self.logger.warning('box info not injected: '+str(ex))
            return

    def elasticize_eols(self,infile):
        basename = infile.basename
        self.logger.info(basename)
        data = infile.data['data']
        data.insert(0,infile.mtime)
        data.insert(0,infile.ls[2:])
        
        values = [int(f) if f.isdigit() else str(f) for f in data]
        try:
            keys = ["ls","fm_date","NEvents","NFiles","TotalEvents","NLostEvents","NBytes"]
            document = dict(zip(keys, values))
        except:
            #try without NBytes
            keys = ["ls","fm_date","NEvents","NFiles","TotalEvents","NLostEvents"]
            document = dict(zip(keys, values))

        document['id'] = infile.name+"_"+self.host
        document['_parent']= self.runnumber
        document['appliance']=self.host
        documents = [document]
        self.index_documents('eols',documents)

    def index_documents(self,name,documents,bulk=True):
        attempts=0
        destination_index = ""
        is_box=False
        if name.startswith("boxinfo") or name=='resource_summary':
          destination_index = self.boxinfo_write
          is_box=True
        else:
          destination_index = self.runindex_write
        while True:
            attempts+=1
            try:
                if bulk:
                    self.es.bulk_index(destination_index,name,documents)
                else:
                    self.es.index(destination_index,name,documents[0])
                return True
            except ElasticHttpError as ex:
                if attempts<=1:continue
                self.logger.error('elasticsearch HTTP error'+str(ex)+'. skipping document '+name)
                if is_box==True:break
                #self.logger.exception(ex)
                return False
            except (socket.gaierror,ConnectionError,Timeout) as ex:
                if attempts>100 and self.runMode:
                    raise(ex)
                self.logger.error('elasticsearch connection error' + str(ex)+'. retry.')
                if self.stopping:return False
                ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                self.es = ElasticSearch(ip_url,timeout=20)
                time.sleep(0.1)
                if is_box==True:break
        return False

Exemple #32

0

Afficher le fichier

Fichier : es_test.py Projet : vnisor/cotl

from pyelasticsearch.client import ElasticSearch
import sys
# by default we connect to localhost:9200

if __name__ == "__main__":
    if len(sys.argv) != 4:
        print "Usage: [*.py] [lat] [lon] [R]"
        sys.exit(0)
    es = ElasticSearch('http://localhost:9200/')
    lat = float(sys.argv[1])
    lon = float(sys.argv[2])
    r = float(sys.argv[3])
    print lat, lon, r
    query = {
        "from":
        0,
        "size":
        10,
        'query': {
            "match_all": {}
        },
        "filter": {
            "geo_distance": {
                "distance": str(r) + 'km',
                "location": {
                    "lat": lat,
                    "lon": lon
                }
            }
        },
        "sort": [{

Exemple #33

0

Afficher le fichier

Fichier : elasticbu.py Projet : cmsdaq/hltd

    def index_documents(self,name,documents,doc_id=None,doc_params=None,bulk=True,overwrite=True,update_only=False,retry_on_conflict=0,script=False):

        if name=='fu-box-status' or name.startswith("boxinfo") or name=='resource_summary':
            destination_index = self.boxinfo_write
            is_box=True
        else:
            destination_index = self.runindex_write
            is_box=False
        attempts=0
        while True:
            attempts+=1
            try:
                if bulk:
                    self.es.bulk_index(destination_index,name,documents)
                else:
                    if doc_id:
                      if update_only:
                        if script:
                          self.es.update(index=destination_index,doc_type=name,id=doc_id,script=documents[0],upsert=False,retry_on_conflict=retry_on_conflict)
                        else:
                          self.es.update(index=destination_index,doc_type=name,id=doc_id,doc=documents[0],upsert=False,retry_on_conflict=retry_on_conflict)
                      else:
                        #overwrite existing can be used with id specified
                        if doc_params:
                          self.es.index(destination_index,name,documents[0],doc_id,parent=doc_params['parent'],overwrite_existing=overwrite)
                        else:
                          self.es.index(destination_index,name,documents[0],doc_id,overwrite_existing=overwrite)
                    else:
                        self.es.index(destination_index,name,documents[0])
                return True

            except ElasticHttpError as ex:
                if name=='run' and ex[0]==409: #create failed because overwrite was forbidden
                    return (False,ex[0])

                if ex[0]==429:
                  if attempts<10 and not is_box:
                    self.logger.warning('elasticsearch HTTP error 429'+str(ex)+'. retrying..')
                    time.sleep(.1)
                    continue
                else:
                  if attempts<=1 and not is_box:continue

                if is_box:
                    self.logger.warning('elasticsearch HTTP error '+str(ex)+'. skipping document '+name)
                else:
                    self.logger.error('elasticsearch HTTP error '+str(ex)+'. skipping document '+name)
                return False
            except (socket.gaierror,ConnectionError,Timeout) as ex:
                if attempts>100 and self.runMode:
                    raise(ex)
                if is_box or attempts<=1:
                    self.logger.warning('elasticsearch connection error' + str(ex)+'. retry.')
                elif (attempts-2)%10==0:
                    self.logger.error('elasticsearch connection error' + str(ex)+'. retry.')
                if self.stopping:return False
                ip_url=getURLwithIP(self.es_server_url,self.nsslock)
                self.es = ElasticSearch(ip_url,timeout=20)
                time.sleep(0.1)
                if is_box==True:#give up on too many box retries as they are indexed again every 5 seconds
                  break
        return False

Exemple #34

0

Afficher le fichier

class elasticBandBU:
    def __init__(self, conf, runnumber, startTime, runMode=True, nsslock=None):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.conf = conf
        self.es_server_url = conf.elastic_runindex_url
        self.runindex_write = "runindex_" + conf.elastic_runindex_name + "_write"
        self.runindex_read = "runindex_" + conf.elastic_runindex_name + "_read"
        self.runindex_name = "runindex_" + conf.elastic_runindex_name
        self.boxinfo_write = "boxinfo_" + conf.elastic_runindex_name + "_write"
        self.boxinfo_read = "boxinfo_" + conf.elastic_runindex_name + "_read"
        self.boxinfo_name = "boxinfo_" + conf.elastic_runindex_name
        self.runnumber = str(runnumber)
        self.startTime = startTime
        self.host = os.uname()[1]
        self.stopping = False
        self.threadEvent = threading.Event()
        self.runMode = runMode
        self.boxinfoFUMap = {}
        self.ip_url = None
        self.nsslock = nsslock
        self.updateIndexMaybe(self.runindex_name, self.runindex_write,
                              self.runindex_read, mappings.central_es_settings,
                              mappings.central_runindex_mapping)
        self.updateIndexMaybe(self.boxinfo_name, self.boxinfo_write,
                              self.boxinfo_read, mappings.central_es_settings,
                              mappings.central_boxinfo_mapping)
        self.black_list = None
        if self.conf.instance == 'main':
            self.hostinst = self.host
        else:
            self.hostinst = self.host + '_' + self.conf.instance

        #write run number document
        if runMode == True and self.stopping == False:
            document = {}
            document['runNumber'] = self.runnumber
            document['startTime'] = startTime
            documents = [document]
            self.index_documents('run', documents)
            #except ElasticHttpError as ex:
            #    self.logger.info(ex)
            #    pass

    def updateIndexMaybe(self, index_name, alias_write, alias_read, settings,
                         mapping):
        connectionAttempts = 0
        retry = False
        while True:
            if self.stopping: break
            connectionAttempts += 1
            try:
                if retry or self.ip_url == None:
                    self.ip_url = getURLwithIP(self.es_server_url,
                                               self.nsslock)
                    self.es = ElasticSearch(self.ip_url,
                                            timeout=20,
                                            revival_delay=60)

                #check if runindex alias exists
                if requests.get(self.es_server_url + '/_alias/' +
                                alias_write).status_code == 200:
                    self.logger.info('writing to elastic index ' +
                                     alias_write + ' on ' +
                                     self.es_server_url + ' - ' + self.ip_url)
                    self.createDocMappingsMaybe(alias_write, mapping)
                    break
                else:
                    time.sleep(.5)
                    if (connectionAttempts % 10) == 0:
                        self.logger.error(
                            'unable to access to elasticsearch alias ' +
                            alias_write + ' on ' + self.es_server_url + ' / ' +
                            self.ip_url)
                    continue
            except ElasticHttpError as ex:
                #es error, retry
                self.logger.error(ex)
                if self.runMode and connectionAttempts > 100:
                    self.logger.error(
                        'elastic (BU): exiting after 100 ElasticHttpError reports from '
                        + self.es_server_url)
                    sys.exit(1)
                elif self.runMode == False and connectionAttempts > 10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry = True
                continue

            except (socket.gaierror, ConnectionError, Timeout) as ex:
                #try to reconnect with different IP from DNS load balancing
                if self.runMode and connectionAttempts > 100:
                    self.logger.error(
                        'elastic (BU): exiting after 100 connection attempts to '
                        + self.es_server_url)
                    sys.exit(1)
                elif self.runMode == False and connectionAttempts > 10:
                    self.threadEvent.wait(60)
                else:
                    self.threadEvent.wait(1)
                retry = True
                continue

    def createDocMappingsMaybe(self, index_name, mapping):
        #update in case of new documents added to mapping definition
        for key in mapping:
            doc = {key: mapping[key]}
            res = requests.get(self.ip_url + '/' + index_name + '/' + key +
                               '/_mapping')
            #only update if mapping is empty
            if res.status_code == 200:
                if res.content.strip() == '{}':
                    requests.post(
                        self.ip_url + '/' + index_name + '/' + key +
                        '/_mapping', json.dumps(doc))
                else:
                    #still check if number of properties is identical in each type
                    inmapping = json.loads(res.content)
                    for indexname in inmapping:
                        properties = inmapping[indexname]['mappings'][key][
                            'properties']
                        #should be size 1
                        for pdoc in mapping[key]['properties']:
                            if pdoc not in properties:
                                requests.post(
                                    self.ip_url + '/' + index_name + '/' +
                                    key + '/_mapping', json.dumps(doc))
                                break

    def read_line(self, fullpath):
        with open(fullpath, 'r') as fp:
            return fp.readline()

    def elasticize_modulelegend(self, fullpath):

        self.logger.info(os.path.basename(fullpath))
        stub = self.read_line(fullpath)
        document = {}
        document['_parent'] = self.runnumber
        document['id'] = "microstatelegend_" + self.runnumber
        document['names'] = self.read_line(fullpath)
        documents = [document]
        return self.index_documents('microstatelegend', documents)

    def elasticize_pathlegend(self, fullpath):

        self.logger.info(os.path.basename(fullpath))
        stub = self.read_line(fullpath)
        document = {}
        document['_parent'] = self.runnumber
        document['id'] = "pathlegend_" + self.runnumber
        document['names'] = self.read_line(fullpath)
        documents = [document]
        return self.index_documents('pathlegend', documents)

    def elasticize_runend_time(self, endtime):

        self.logger.info(str(endtime) + " going into buffer")
        document = {}
        document['runNumber'] = self.runnumber
        document['startTime'] = self.startTime
        document['endTime'] = endtime
        documents = [document]
        self.index_documents('run', documents)

    def elasticize_box(self, infile):

        basename = infile.basename
        self.logger.debug(basename)
        current_time = time.time()

        if infile.data == {}: return

        bu_doc = False
        if basename.startswith('bu') or basename.startswith('dvbu'):
            bu_doc = True

        #check box file against blacklist
        if bu_doc or self.black_list == None:
            self.black_list = []

            try:
                with open(
                        os.path.join(self.conf.watch_directory, 'appliance',
                                     'blacklist'), "r") as fi:
                    try:
                        self.black_list = json.load(fi)
                    except ValueError:
                        #file is being written or corrupted
                        return
            except:
                #blacklist file is not present, do not filter
                pass

        if basename in self.black_list: return

        if bu_doc == False:
            try:
                self.boxinfoFUMap[basename] = [infile.data, current_time]
            except Exception as ex:
                self.logger.warning('box info not injected: ' + str(ex))
                return
        try:
            document = infile.data
            #unique id for separate instances
            if bu_doc:
                document['id'] = self.hostinst
            else:
                document['id'] = basename

            #both here and in "boxinfo_appliance"
            document['appliance'] = self.host
            document['instance'] = self.conf.instance
            #only here
            document['host'] = basename

            self.index_documents('boxinfo', [document])
        except Exception as ex:
            self.logger.warning('box info not injected: ' + str(ex))
            return
        if bu_doc:
            try:
                document = infile.data
                try:
                    document.pop('id')
                except:
                    pass
                try:
                    document.pop('host')
                except:
                    pass
                #aggregation from FUs
                document['idles'] = 0
                document['used'] = 0
                document['broken'] = 0
                document['quarantined'] = 0
                document['cloud'] = 0
                document['usedDataDir'] = 0
                document['totalDataDir'] = 0
                document['hosts'] = [basename]
                document['blacklistedHosts'] = []
                for key in self.boxinfoFUMap:
                    dpair = self.boxinfoFUMap[key]
                    d = dpair[0]
                    #check if entry is not older than 10 seconds
                    if current_time - dpair[1] > 10: continue
                    document['idles'] += int(d['idles'])
                    document['used'] += int(d['used'])
                    document['broken'] += int(d['broken'])
                    document['quarantined'] += int(d['quarantined'])
                    document['cloud'] += int(d['cloud'])
                    document['usedDataDir'] += int(d['usedDataDir'])
                    document['totalDataDir'] += int(d['totalDataDir'])
                    document['hosts'].append(key)
                for blacklistedHost in self.black_list:
                    document['blacklistedHosts'].append(blacklistedHost)
                self.index_documents('boxinfo_appliance', [document],
                                     bulk=False)
            except Exception as ex:
                #in case of malformed box info
                self.logger.warning('box info not injected: ' + str(ex))
                return

    def elasticize_eols(self, infile):
        basename = infile.basename
        self.logger.info(basename)
        data = infile.data['data']
        data.append(infile.mtime)
        data.append(infile.ls[2:])

        values = [int(f) if f.isdigit() else str(f) for f in data]
        keys = ["NEvents", "NFiles", "TotalEvents", "fm_date", "ls"]
        document = dict(zip(keys, values))

        document['id'] = infile.name + "_" + os.uname()[1]
        document['_parent'] = self.runnumber
        documents = [document]
        self.index_documents('eols', documents)

    def index_documents(self, name, documents, bulk=True):
        attempts = 0
        destination_index = ""
        is_box = False
        if name.startswith("boxinfo"):
            destination_index = self.boxinfo_write
            is_box = True
        else:
            destination_index = self.runindex_write
        while True:
            attempts += 1
            try:
                if bulk:
                    self.es.bulk_index(destination_index, name, documents)
                else:
                    self.es.index(destination_index, name, documents[0])
                return True
            except ElasticHttpError as ex:
                if attempts <= 1: continue
                self.logger.error(
                    'elasticsearch HTTP error. skipping document ' + name)
                if is_box == True: break
                #self.logger.exception(ex)
                return False
            except (socket.gaierror, ConnectionError, Timeout) as ex:
                if attempts > 100 and self.runMode:
                    raise (ex)
                self.logger.error('elasticsearch connection error. retry.')
                if self.stopping: return False
                time.sleep(0.1)
                ip_url = getURLwithIP(self.es_server_url, self.nsslock)
                self.es = ElasticSearch(ip_url, timeout=20, revival_delay=60)
                if is_box == True: break
        return False

Exemple #35

0

Afficher le fichier

Fichier : elasticBand.py Projet : zazasa/hltd

    def __init__(self,es_server_url,runstring,indexSuffix,monBufferSize,fastUpdateModulo):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.istateBuffer = []  
        self.prcinBuffer = {}   # {"lsX": doclist}
        self.prcoutBuffer = {}
        self.fuoutBuffer = {}
        self.es = ElasticSearch(es_server_url) 
        self.settings = {
            "analysis":{
                "analyzer": {
                    "prefix-test-analyzer": {
                        "type": "custom",
                        "tokenizer": "prefix-test-tokenizer"
                    }
                },
                "tokenizer": {
                    "prefix-test-tokenizer": {
                        "type": "path_hierarchy",
                        "delimiter": "_"
                    }
                }
            },
            "index":{
                'number_of_shards' : 16,
                'number_of_replicas' : 1
            }
        }

        self.run_mapping = {
            'prc-i-state' : {
                'properties' : {
                    'macro'     : {'type' : 'integer'},
                    'mini'      : {'type' : 'integer'},
                    'micro'     : {'type' : 'integer'},
                    'tp'        : {'type' : 'double' },
                    'lead'      : {'type' : 'double' },
                    'nfiles'    : {'type' : 'integer'},
                    'fm_date'   : {'type' : 'date'   }
                },
                '_timestamp' : { 
                    'enabled'   : True,
                    'store'     : "yes",
                    "path"      : "fm_date"
                },
                '_ttl'       : { 'enabled' : True,                             
                                 'default' :  '5m'
                } 
            },
            'prc-s-state' : {
                'properties' : {
                    'macro'  : {'type' : 'integer'},
                    'mini'   : {'type' : 'integer'},
                    'micro'  : {'type' : 'integer'},
                    'tp'     : {'type' : 'double'},
                    'lead'   : {'type' : 'double'},
                    'nfiles' : {'type' : 'integer'},            
                    'ls'     : {'type' : 'integer'},
                    'process': {'type' : 'string'}
                },
            },
            'fu-s-state' : {
                'properties' : {
                    'macro'  : {'type' : 'integer'},
                    'mini'   : {'type' : 'integer'},
                    'micro'  : {'type' : 'integer'},
                    'tp'     : {'type' : 'double'},
                    'lead'   : {'type' : 'double'},
                    'nfiles' : {'type' : 'integer'},            
                    'ls'     : {'type' : 'integer'},
                    'machine': {'type' : 'string'}
                }
            },
            'prc-out': {
                '_routing' :{
                    'required' : True,
                    'path'     : 'source'
                },
                'properties' : {
                    #'definition': {'type': 'string'},
                    'data' : { 'properties' : {
                            'in' : { 'type' : 'integer'},
                            'out': { 'type' : 'integer'},
                            'file': { 'type' : 'string','index' : 'not_analyzed'}
                            }           
                    },
                    'ls' : { 
                        'type' : 'integer',
                        'store': "yes"
                    },
                    'stream' : {'type' : 'string','index' : 'not_analyzed'},
                    'source' : {
                        'type' : 'string',
                        'index_analyzer': 'prefix-test-analyzer',
                        'search_analyzer': "keyword",
                        'store' : "yes",
                        'index' : "analyzed"
                    }
                },
                '_timestamp' : { 
                    'enabled' : True,
                    'store'   : "yes"
                }
            },
            'prc-in': {
                '_routing' :{
                    'required' : True,
                    'path'     : 'dest'
                },
                'properties' : {
                    #'definition': {'type': 'string',"index" : "not_analyzed"},
                    'data' : { 'properties' : {
                            'out'    : { 'type' : 'integer'}
                            }
                    },
                    'ls'     : { 
                        'type' : 'integer',
                        'store': 'yes'
                    },
                    'index'  : { 'type' : 'integer' },
                    'source' : { 'type' : 'string'  },
                    'dest' : {
                        'type' : 'string',
                        'index_analyzer': 'prefix-test-analyzer',
                        'search_analyzer': "keyword",
                        'store' : "yes",
                        'index' : "analyzed",
                        },
                    'process' : { 'type' : 'integer' }
                },
                '_timestamp' : { 
                    'enabled' : True,
                    'store'   : "yes"
                }
            },
            'fu-out': {
                '_routing' :{
                    'required' : True,
                    'path'     : 'source'
                },
                'properties' : {
                    #'definition': {'type': 'string',"index" : "not_analyzed"},
                    'data' : { 'properties' : {
                            'in' : { 'type' : 'integer'},
                            'out': { 'type' : 'integer'},
                            'errorEvents' : {'type' : 'integer'},
                            'returnCodeMask': {'type':'string',"index" : "not_analyzed"},
                            'fileSize' : {'type':'long'},
                            'files': {
                                'properties' : {
                                    'name' : { 'type' : 'string',"index" : "not_analyzed"}
                                    }
                                }
                             }
                    },
                    'ls' : { 'type' : 'integer' },
                    'stream' : {'type' : 'string','index' : 'not_analyzed'},
                    'source' : {
                        'type' : 'string',
                        'index_analyzer': 'prefix-test-analyzer',
                        'search_analyzer': "keyword"
                    }
                },
                '_timestamp' : { 
                    'enabled' : True,
                    'store'   : "yes"
                }
            },
            'fu-complete' : {
                'properties' : {
                    'host'     : {'type' : 'string'},
                    'fm_date'   : {'type' : 'date' }
                },
                '_timestamp' : { 
                    'enabled'   : True,
                    'store'     : "yes",
                    "path"      : "fm_date"
                },
            },
            'bu-out': {
                'properties' : {
                    #'definition': {'type': 'string',"index" : "not_analyzed"},
                    'out': { 'type' : 'integer'},
                    'ls' : { 'type' : 'integer' },
                    'source' : {'type' : 'string'}#,"index" : "not_analyzed"}
                }
            },
            'cmsswlog' : {
                '_timestamp' : { 
                    'enabled'   : True,
                    'store'     : "yes"
                },
                '_ttl'       : { 'enabled' : True,
                              'default' :  '30d'}
                ,
                'properties' : {
                    'host'      : {'type' : 'string'},
                    'pid'       : {'type' : 'integer'},
                    'type'      : {'type' : 'string',"index" : "not_analyzed"},
                    'severity'  : {'type' : 'string',"index" : "not_analyzed"},
                    'severityVal'  : {'type' : 'integer'},
                    'category'  : {'type' : 'string'},

                    'fwkState'     : {'type' : 'string',"index" : "not_analyzed"},
                    'module'     : {'type' : 'string',"index" : "not_analyzed"},
                    'moduleInstance'     : {'type' : 'string',"index" : "not_analyzed"},
                    'moduleCall'     : {'type' : 'string',"index" : "not_analyzed"},
                    'lumi'     : {'type' : 'integer'},
                    'eventInPrc'     : {'type' : 'long'},

                    'message'   : {'type' : 'string'},#,"index" : "not_analyzed"},
                    'lexicalId' : {'type' : 'string',"index" : "not_analyzed"},
                    'msgtime' : {'type' : 'date','format':'dd-MMM-YYYY HH:mm:ss'},
                    'msgtimezone' : {'type' : 'string'}
                    #'context'   : {'type' : 'string'}
                 }
            }
        }
        self.run = runstring
        self.monBufferSize = monBufferSize
        self.fastUpdateModulo = fastUpdateModulo
        self.indexName = runstring + "_"+indexSuffix
        try:
            self.es.create_index(self.indexName, settings={ 'settings': self.settings, 'mappings': self.run_mapping })
        except ElasticHttpError as ex:
#            print "Index already existing - records will be overridden"
            #this is normally fine as the index gets created somewhere across the cluster
            pass

Exemple #36

0

Afficher le fichier

Fichier : collate.py Projet : diguida/hltd

class Collation:
    def __init__(self, es_server_url):
        self.server = ElasticSearch(es_server_url)
        self.datadict = {
            "prc-out": {
                "lookup": Query("prc-out", "source"),
                "action": {
                    "definition": Aggregator("drop"),
                    "data": Aggregator({"in": Aggregator("add"), "out": Aggregator("add"), "file": Aggregator("cat")}),
                    "ls": Aggregator("check"),
                    "stream": Aggregator("check"),
                    "source": Aggregator("match"),
                },
            },
            "prc-in": {
                "lookup": Query("prc-in", "dest"),
                "action": {
                    "definition": Aggregator("drop"),
                    "data": Aggregator({"out": Aggregator("add")}),
                    "ls": Aggregator("check"),
                    "index": Aggregator("cat"),
                    "source": Aggregator("check"),
                    "dest": Aggregator("check"),
                    "process": Aggregator("cat"),
                },
            },
            "prc-s-state": {
                "lookup": Query("prc-s-state"),
                "action": {
                    "macro": Aggregator("histoadd"),
                    "mini": Aggregator("histoadd"),
                    "micro": Aggregator("histoadd"),
                    "tp": Aggregator("add"),
                    "lead": Aggregator("avg"),
                    "nfiles": Aggregator("add"),
                    "ls": Aggregator("check"),
                    "process": Aggregator("cat"),
                },
            },
        }

    def lookup(self, doctype):
        return self.datadict[doctype]["lookup"]

    def action(self, doctype):
        return self.datadict[doctype]["action"]

    # print datadict[type]['lookup']
    def search(self, ind, doctype, ls, stream=None):
        if stream:
            result = self.server.search(self.lookup(doctype)(ls, stream), index=ind)
        else:
            result = self.server.search(self.lookup(doctype)(ls), index=ind)
        return result

    def collate(self, ind, doctype, ls, stream=None):
        result = self.search(ind, doctype, ls, stream)
        for element in result["hits"]["hits"]:
            for k, v in element["_source"].items():
                self.action(doctype)[k](v)
        retval = dict((k, v.value()) for k, v in self.action(doctype).items())
        for v in self.action(doctype).values():
            v.reset()
        return retval

    def refresh(self, ind):
        self.server.refresh(ind)

    def stash(self, ind, doctype, doc):
        result = self.server.index(ind, doctype, doc)
        return result

Exemple #37

0

Afficher le fichier

Fichier : elasticsearch.py Projet : theorm/puerh

 def __init__(self, url='http://localhost:9200/', index='events'):
     self._es = ElasticSearch(url)
     self._es.json_encoder = ESJSONEncoder
     self._index = index

Exemple #38

0

Afficher le fichier

Fichier : setupES.py Projet : cmsdaq/hltd

def setupES(es_server_url='http://localhost:9200',deleteOld=1,doPrint=False,overrideTests=False, forceReplicas=-1, forceShards=-1, create_index_name=None,subsystem="cdaq"):

    #ip_url=getURLwithIP(es_server_url)
    es = ElasticSearch(es_server_url,timeout=5)

    #list_template
    templateList = es.send_request('GET', ['_template'])

    TEMPLATES = ["runappliance_"+subsystem]
    loaddoc = None
    for template_name in TEMPLATES:
        template_label = template_name.split('_')[0]
        if template_name not in templateList:
            printout(template_name + " template not present. It will be created. ",doPrint,False)
            loaddoc = create_template(es,template_name,template_label,subsystem,forceReplicas,forceShards)
        else:
            loaddoc = create_template(es,None,template_label,subsystem,forceReplicas,forceShards,send=False)
            norm_name = convert(templateList[template_name])
            if deleteOld==0:
                printout(template_name+" already exists. Add 'replace' parameter to update if different, or forceupdate to always  update.",doPrint,False)
            else:
                printout(template_name+" already exists.",doPrint,False)
                if loaddoc!=None:
                    mappingSame =  norm_name['mappings']==loaddoc['mappings']
                    #settingSame = norm_name['settings']==loaddoc['settings']
                    settingsSame=True
                    #convert to int before comparison
                    if int(norm_name['settings']['index']['number_of_replicas'])!=int(loaddoc['settings']['index']['number_of_replicas']):
                        settingsSame=False
                    if int(norm_name['settings']['index']['number_of_shards'])!=int(loaddoc['settings']['index']['number_of_shards']):
                        settingsSame=False
                    #add more here if other settings need to be added
                    if 'translog' not in norm_name['settings']['index'] or norm_name['settings']['index']['translog']!=loaddoc['settings']['index']['translog']:
                        settingsSame=False
                    #currently analyzer settings are not verified

                    if not (mappingSame and settingsSame) or deleteOld>1:
                        #test is override
                        if overrideTests==False:
                            try:
                                if norm_name['settings']['test']==True:
                                    printout("Template test setting found, skipping update...",doPrint,True)
                                    break
                            except:pass
                        printout("Updating "+template_name+" ES template",doPrint,True)
                        create_template(es,template_name,template_label,subsystem,forceReplicas,forceShards)
                    else:
                        printout('runappliance ES template is up to date',doPrint,True)

    #create index
    if create_index_name:
        if loaddoc:
            try:
                c_res = es.send_request('PUT', [create_index_name], body = loaddoc)
                if c_res!={'acknowledged':True}:
                    printout("Result of index " + create_index_name + " create request: " + str(c_res),doPrint,True )
            except ElasticHttpError, ex:
                if ex[1]['type']=='index_already_exists_exception':
                    #this is for index pre-creator
                    printout("Attempting to intialize already existing index "+create_index_name,doPrint,True)
                    try:
                        doc_resp = es.send_request('GET', ['_cat','indices',create_index_name],query_params={'h':'status'})
                        if doc_resp.strip('\n')=='close':
                            printout("Index "+create_index_name+ " is already closed! Index will be reopened",doPrint,True)
                            c_res = es.send_request('POST', [create_index_name,'_open'])
                    except ElasticHttpError as ex:
                        printout("setupES got ElasticHttpError getting index open/close state: "+str(ex),doPrint,True)
                    except Exception as ex:
                        printout("setupEs got Exception getting index open/closed state: "+str(ex),doPrint,True)
            except Exception as ex:
                #if type(ex)==RemoteTransportException: print "a",type(ex)
                printout("Index not created: "+str(ex),doPrint,True)

Exemple #39

0

Afficher le fichier

Fichier : elasticsearch.py Projet : theorm/puerh

 def __init__(self, url='http://localhost:9200/', index='events'):
     self._es = ElasticSearch(url)
     self._index = index

Exemple #40

0

Afficher le fichier

Fichier : elasticbu.py Projet : zazasa/hltd

 def resetURL(url):
     self.es = None
     self.es = ElasticSearch(url)

Exemple #41

0

Afficher le fichier

Fichier : flask_api.py Projet : vnisor/cotl

#!/usr/bin/python
from flask import Flask, request, json
import flask
import happybase
from pyelasticsearch.client import ElasticSearch
import hashlib
hbasehost = 'c0tl.com'
from struct import *
app = Flask(__name__)
es = ElasticSearch('http://*****:*****@app.route('/')
def home():
    return """<html>
  <h2>Welcome to the colt API!</h2>
</html>"""

Exemple #42

0

Afficher le fichier

Fichier : elasticbu.py Projet : zazasa/hltd

class elasticBandBU:

    def __init__(self,es_server_url,runnumber,startTime,runMode=True):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.es_server_url=es_server_url
        self.index_name=conf.elastic_runindex_name
        self.runnumber = str(runnumber)
        self.startTime = startTime
        self.host = os.uname()[1]
        self.stopping=False
        self.threadEvent = threading.Event()
        self.runMode=runMode
        self.settings = {
            "analysis":{
                "analyzer": {
                    "prefix-test-analyzer": {
                        "type": "custom",
                        "tokenizer": "prefix-test-tokenizer"
                    }
                },
                "tokenizer": {
                    "prefix-test-tokenizer": {
                        "type": "path_hierarchy",
                        "delimiter": " "
                    }
                }
             },
            "index":{
                'number_of_shards' : 10,
                'number_of_replicas' : 3
            },
        }

        self.run_mapping = {
            'run' : {
#                '_routing' :{
#                    'required' : True,
#                    'path'     : 'runNumber'
#                },
                '_id' : {
                    'path' : 'runNumber'
                },
                'properties' : {
                    'runNumber':{
                        'type':'integer'
                        },
                    'startTimeRC':{
                        'type':'date'
                            },
                    'stopTimeRC':{
                        'type':'date'
                            },
                    'startTime':{
                        'type':'date'
                            },
                    'endTime':{
                        'type':'date'
                            },
                    'completedTime' : {
                        'type':'date'
                            }
                },
                '_timestamp' : {
                    'enabled' : True,
                    'store'   : 'yes'
                    }
            },
            'microstatelegend' : {

                '_id' : {
                    'path' : 'id'
                },
                '_parent':{'type':'run'},
                'properties' : {
                    'names':{
                        'type':'string'
                        },
                    'id':{
                        'type':'string'
                        }
                    }
            },
            'pathlegend' : {

                '_id' : {
                    'path' : 'id'
                },
                '_parent':{'type':'run'},
                'properties' : {
                    'names':{
                        'type':'string'
                        },
                    'id':{
                        'type':'string'
                        }

                    }
                },
            'boxinfo' : {
                '_id'        :{'path':'id'},#TODO:remove
                'properties' : {
                    'fm_date'       :{'type':'date'},
                    'id'            :{'type':'string'},
                    'broken'        :{'type':'integer'},
                    'used'          :{'type':'integer'},
                    'idles'         :{'type':'integer'},
                    'quarantined'   :{'type':'integer'},
                    'usedDataDir'   :{'type':'integer'},
                    'totalDataDir'  :{'type':'integer'},
                    'usedRamdisk'   :{'type':'integer'},
                    'totalRamdisk'  :{'type':'integer'},
                    'usedOutput'    :{'type':'integer'},
                    'totalOutput'   :{'type':'integer'},
                    'activeRuns'    :{'type':'string'}
                    },
                '_timestamp' : { 
                    'enabled'   : True,
                    'store'     : "yes",
                    "path"      : "fm_date"
                    },
                '_ttl'       : { 'enabled' : True,
                              'default' :  '30d'
                    }
                },

            'boxinfo_last' : {
                '_id'        :{'path':'id'},
                'properties' : {
                    'fm_date'       :{'type':'date'},
                    'id'            :{'type':'string'},
                    'broken'        :{'type':'integer'},
                    'used'          :{'type':'integer'},
                    'idles'         :{'type':'integer'},
                    'quarantined'   :{'type':'integer'},
                    'usedDataDir'   :{'type':'integer'},
                    'totalDataDir'  :{'type':'integer'},
                    'usedRamdisk'   :{'type':'integer'},
                    'totalRamdisk'  :{'type':'integer'},
                    'usedOutput'    :{'type':'integer'},
                    'totalOutput'   :{'type':'integer'},
                    'activeRuns'    :{'type':'string'}
                    },
                '_timestamp' : { 
                    'enabled'   : True,
                    'store'     : "yes",
                    "path"      : "fm_date"
                    }
                },

            'eols' : {
                '_id'        :{'path':'id'},
                '_parent'    :{'type':'run'},
                'properties' : {
                    'fm_date'       :{'type':'date'},
                    'id'            :{'type':'string'},
                    'ls'            :{'type':'integer'},
                    'NEvents'       :{'type':'integer'},
                    'NFiles'        :{'type':'integer'},
                    'TotalEvents'   :{'type':'integer'}
                    },
                '_timestamp' : { 
                    'enabled'   : True,
                    'store'     : "yes",
                    "path"      : "fm_date"
                    },
                },
            'minimerge' : {
                '_id'        :{'path':'id'},
                '_parent'    :{'type':'run'},
                'properties' : {
                    'fm_date'       :{'type':'date'},
                    'id'            :{'type':'string'}, #run+appliance+stream+ls
                    'appliance'     :{'type':'string'},
                    'stream'        :{'type':'string','index' : 'not_analyzed'},
                    'ls'            :{'type':'integer'},
                    'processed'     :{'type':'integer'},
                    'accepted'      :{'type':'integer'},
                    'errorEvents'   :{'type':'integer'},
                    'size'          :{'type':'integer'},
                    }
                }
            }


        connectionAttempts=0
        while True:
            if self.stopping:break
            connectionAttempts+=1
            try:
                self.logger.info('writing to elastic index '+self.index_name)
                ip_url=getURLwithIP(es_server_url)
                self.es = ElasticSearch(es_server_url)
                self.es.create_index(self.index_name, settings={ 'settings': self.settings, 'mappings': self.run_mapping })
                break
            except ElasticHttpError as ex:
                #this is normally fine as the index gets created somewhere across the cluster
                if "IndexAlreadyExistsException" in str(ex):
                    self.logger.info(ex)
                    break
                else:
                    self.logger.error(ex)
                    if runMode and connectionAttempts>100:
                        self.logger.error('elastic (BU): exiting after 100 ElasticHttpError reports from '+ es_server_url)
                        sys.exit(1)
                    elif runMode==False and connectionAttempts>10:
                        self.threadEvent.wait(60)
                    else:
                        self.threadEvent.wait(1)
                    continue

            except (ConnectionError,Timeout) as ex:
                #try to reconnect with different IP from DNS load balancing
                if runMode and connectionAttempts>100:
                   self.logger.error('elastic (BU): exiting after 100 connection attempts to '+ es_server_url)
                   sys.exit(1)
                elif runMode==False and connectionAttempts>10:
                   self.threadEvent.wait(60)
                else:
                   self.threadEvent.wait(1)
                continue
            
        #write run number document
        if runMode == True:
            document = {}
            document['runNumber'] = self.runnumber
            document['startTime'] = startTime
            documents = [document]
            self.index_documents('run',documents)
            #except ElasticHttpError as ex:
            #    self.logger.info(ex)
            #    pass

    def resetURL(url):
        self.es = None
        self.es = ElasticSearch(url)

    def read_line(self,fullpath):
        with open(fullpath,'r') as fp:
            return fp.readline()
    
    def elasticize_modulelegend(self,fullpath):

        self.logger.info(os.path.basename(fullpath))
        stub = self.read_line(fullpath)
        document = {}
        document['_parent']= self.runnumber
        document['id']= "microstatelegend_"+self.runnumber
        document['names']= self.read_line(fullpath)
        documents = [document]
        return self.index_documents('microstatelegend',documents)


    def elasticize_pathlegend(self,fullpath):

        self.logger.info(os.path.basename(fullpath))
        stub = self.read_line(fullpath)
        document = {}
        document['_parent']= self.runnumber
        document['id']= "pathlegend_"+self.runnumber
        document['names']= self.read_line(fullpath)
        documents = [document]
        return self.index_documents('pathlegend',documents)

    def elasticize_runend_time(self,endtime):

        self.logger.info(str(endtime)+" going into buffer")
        document = {}
        document['runNumber'] = self.runnumber
        document['startTime'] = self.startTime
        document['endTime'] = endtime
        documents = [document]
        self.index_documents('run',documents)

    def elasticize_box(self,infile):

        basename = infile.basename
        self.logger.debug(basename)
        try:
            document = infile.data
            #TODO:let dynamic ID
            document['id']= basename + '_' + document['fm_date'].split('.')[0] #TODO:remove
            documents = [document]
        except:
            #in case of malformed box info
            return
        self.index_documents('boxinfo',documents)
        #self.logger.info(str(document))#check that ID is not present...
        #TODO:write unique boxinfo
        #documents[0]['id']=basename
        #self.index_documents('boxinfo_last',documents)

    def elasticize_eols(self,infile):
        basename = infile.basename
        self.logger.info(basename)
        data = infile.data['data']
        data.append(infile.mtime)
        data.append(infile.ls[2:])
        
        values = [int(f) if f.isdigit() else str(f) for f in data]
        keys = ["NEvents","NFiles","TotalEvents","fm_date","ls"]
        document = dict(zip(keys, values))

        document['id'] = infile.name+"_"+os.uname()[1]
        document['_parent']= self.runnumber
        documents = [document]
        self.index_documents('eols',documents)

    def elasticize_minimerge(self,infile):
        basename = infile.basename
        self.logger.info(basename)
        data = infile.data['data']
        data.append(infile.mtime)
        data.append(infile.ls[2:])
        stream=infile.stream
        if stream.startswith("stream"): stream = stream[6:]
        data.append(stream)
        values = [int(f) if str(f).isdigit() else str(f) for f in data]
        keys = ["processed","accepted","errorEvents","fname","size","eolField1","eolField2","fm_date","ls","stream"]
        document = dict(zip(keys, values))
        document['id'] = infile.name
        document['_parent']= self.runnumber
        documents = [document]
        self.index_documents('minimerge',documents)

    def index_documents(self,name,documents):
        attempts=0
        while True:
            attempts+=1
            try:
                self.es.bulk_index(self.index_name,name,documents)
                return True
            except ElasticHttpError as ex:
                if attempts<=1:continue
                self.logger.error('elasticsearch HTTP error. skipping document '+name)
                #self.logger.exception(ex)
                return False
            except (ConnectionError,Timeout) as ex:
                if attempts>100 and self.runMode:
                    raise(ex)
                self.logger.error('elasticsearch connection error. retry.')
                if self.stopping:return False
                time.sleep(0.1)
                ip_url=getURLwithIP(self.es_server_url)
                self.es = ElasticSearch(ip_url)
        return False

Exemple #43

0

Afficher le fichier

#!/bin/env python
import os, sys, time, datetime
import threading
from pyelasticsearch.client import ElasticSearch
import json
from ordereddict import OrderedDict

#es_hosts=['http://fuval-c2a11-02:9200','http://fuval-c2a11-03:9200','http://fuval-c2a11-15:9200']
#es_tribe_hosts=['http://fuval-c2a11-28:9200']

es_hosts = ['http://dvcu-ccsl6-01:9200']
es_tribe_hosts = ['http://dvtu-ccsl6-01:9200']

main_es = ElasticSearch(es_hosts[0])
tribe_es = ElasticSearch(es_tribe_hosts[0])
main_index = 'runindex'
setup = 'daq2val'


class query_maker(threading.Thread):
    def __init__(self, run):
        threading.Thread.__init__(self)
        self.running = True
        self.hostname = os.uname()[1]
        self.ip = {}
        self.runno = run
        self.known_streams = {}
        app_query = {
            "query": {
                "top_children": {
                    "score": "sum",

Exemple #44

0

Afficher le fichier

Fichier : create_index.py Projet : cmsdaq/hltd

  command = sys.argv[1]
  server_url=sys.argv[2]
  index_name=sys.argv[3]
else:
  print "Parameters: command[create,alias,mapping] server url, index.alias name (target index)"
  print "  COMMANDS:"
  print "    create: create index"
  print "    alias: create index *_read and *_write aliases (optional parameter: target index)"
  print "    create missing document mappings for the index"
  sys.exit(1)

if server_url.startswith('http://')==False:
  server_url='http://'+server_url

#connection
es = ElasticSearch(server_url)

#pick mapping
if index_name.startswith('runindex'):
  my_settings = mappings.central_es_settings_runindex
  my_mapping = mappings.central_runindex_mapping
if index_name.startswith('boxinfo'):
  my_settings = mappings.central_es_settings_boxinfo,
  my_mapping = mappings.central_boxinfo_mapping
if index_name.startswith('hltdlogs'):
  my_settings = mappings.central_es_settings_hltlogs
  my_mapping = mappings.central_hltdlogs_mapping

#alias convention
alias_write=index_name+"_write"
alias_read=index_name+"_read"