def __init__(self, site, cloud, nJobs): """Initialize class with parameters """ self.__site = site self.__cloud = cloud self.__nJobs = nJobs taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)
def run(inFile,v_onlyTA): import cPickle as pickle try: # read Jobs from file f = open(inFile) jobs = pickle.load(f) f.close() except: type, value, traceBack = sys.exc_info() print("run() : %s %s" % (type,value)) return # password from config import panda_config passwd = panda_config.dbpasswd # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # instantiate TB from taskbuffer.TaskBuffer import taskBuffer taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # run Setupper from dataservice.Setupper import Setupper thr = Setupper(taskBuffer,jobs,onlyTA=v_onlyTA) thr.start() thr.join() return
def main(backGround=False): _logger.debug('starting ...') # register signal handler signal.signal(signal.SIGINT, catch_sig) signal.signal(signal.SIGHUP, catch_sig) signal.signal(signal.SIGTERM, catch_sig) signal.signal(signal.SIGALRM, catch_sig) signal.alarm(overallTimeout) # forking pid = os.fork() if pid != 0: # watch child process os.wait() time.sleep(1) else: # main loop from taskbuffer.TaskBuffer import taskBuffer # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # ActiveMQ params clientid = 'PANDA-' + socket.getfqdn() queue = '/queue/Consumer.test1.poc.pocMSG' ssl_opts = { 'use_ssl': True, 'ssl_cert_file': '%s/hostcert.pem' % panda_config.certdir, 'ssl_key_file': '%s/hostkey.pem' % panda_config.certdir } # resolve multiple brokers brokerList = socket.gethostbyname_ex('gridmsg007.cern.ch')[-1] # set listener for tmpBroker in brokerList: try: _logger.debug('setting listener on %s' % tmpBroker) conn = stomp.Connection(host_and_ports=[(tmpBroker, 6162)], **ssl_opts) conn.set_listener( 'GenCallbackConsumer', GenCallbackConsumer(conn, taskBuffer, siteMapper)) conn.start() conn.connect(headers={'client-id': clientid}) conn.subscribe(destination=queue, ack='client-individual') #,headers = {'selector':"cbtype='FileDoneMessage'"}) if not conn.is_connected(): _logger.error("connection failure to %s" % tmpBroker) except: errtype, errvalue = sys.exc_info()[:2] _logger.error("failed to set listener on %s : %s %s" % (tmpBroker, errtype, errvalue)) catch_sig(None, None)
def main(backGround=False): _logger.debug('starting ...') # register signal handler signal.signal(signal.SIGINT, catch_sig) signal.signal(signal.SIGHUP, catch_sig) signal.signal(signal.SIGTERM,catch_sig) signal.signal(signal.SIGALRM,catch_sig) signal.alarm(overallTimeout) # forking pid = os.fork() if pid != 0: # watch child process os.wait() time.sleep(1) else: # main loop from taskbuffer.TaskBuffer import taskBuffer # check certificate certName = '/data/atlpan/pandasv1_usercert.pem' _logger.debug('checking certificate {0}'.format(certName)) certOK,certMsg = DataServiceUtils.checkCertificate(certName) if not certOK: _logger.error('bad certificate : {0}'.format(certMsg)) # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # ActiveMQ params queue = '/queue/Consumer.PANDA.atlas.ddm.siteservices' ssl_opts = {'use_ssl' : True, 'ssl_cert_file' : certName, 'ssl_key_file' : '/data/atlpan/pandasv1_userkey.pem'} # resolve multiple brokers brokerList = socket.gethostbyname_ex('atlasddm-mb.cern.ch')[-1] # set listener for tmpBroker in brokerList: try: clientid = 'PANDA-' + socket.getfqdn() + '-' + tmpBroker _logger.debug('setting listener %s' % clientid) conn = stomp.Connection(host_and_ports = [(tmpBroker, 6162)], **ssl_opts) conn.set_listener('FileCallbackListener', FileCallbackListener(conn,taskBuffer,siteMapper)) conn.start() conn.connect(headers = {'client-id': clientid}) conn.subscribe(destination=queue, ack='client-individual') #,headers = {'selector':"cbtype='FileDoneMessage'"}) if not conn.is_connected(): _logger.error("connection failure to %s" % tmpBroker) _logger.debug('listener %s is up and running' % clientid) except: errtype,errvalue = sys.exc_info()[:2] _logger.error("failed to set listener on %s : %s %s" % (tmpBroker,errtype,errvalue)) catch_sig(None,None)
def main(backGround=False): _logger.debug('starting ...') # register signal handler signal.signal(signal.SIGINT, catch_sig) signal.signal(signal.SIGHUP, catch_sig) signal.signal(signal.SIGTERM,catch_sig) signal.signal(signal.SIGALRM,catch_sig) signal.alarm(overallTimeout) # forking pid = os.fork() if pid != 0: # watch child process os.wait() time.sleep(1) else: # main loop from taskbuffer.TaskBuffer import taskBuffer # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # ActiveMQ params clientid = 'PANDA-' + socket.getfqdn() queue = '/queue/Consumer.test1.poc.pocMSG' ssl_opts = {'use_ssl' : True, 'ssl_cert_file' : '%s/hostcert.pem'%panda_config.certdir, 'ssl_key_file' : '%s/hostkey.pem'%panda_config.certdir} # resolve multiple brokers brokerList = socket.gethostbyname_ex('gridmsg007.cern.ch')[-1] # set listener for tmpBroker in brokerList: try: _logger.debug('setting listener on %s' % tmpBroker) conn = stomp.Connection(host_and_ports = [(tmpBroker, 6162)], **ssl_opts) conn.set_listener('GenCallbackConsumer', GenCallbackConsumer(conn,taskBuffer,siteMapper)) conn.start() conn.connect(headers = {'client-id': clientid}) conn.subscribe(destination=queue, ack='client-individual') #,headers = {'selector':"cbtype='FileDoneMessage'"}) if not conn.is_connected(): _logger.error("connection failure to %s" % tmpBroker) except: errtype,errvalue = sys.exc_info()[:2] _logger.error("failed to set listener on %s : %s %s" % (tmpBroker,errtype,errvalue)) catch_sig(None,None)
def __init__(self): threading.Thread.__init__(self) taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) if hasattr(panda_config, 'NWS_URL'): self.NWS_URL = panda_config.NWS_URL else: self.NWS_URL = 'http://atlas-adc-netmetrics-lb.cern.ch/metrics/latest.json' _logger.debug('Getting NWS dump...') self.nws_dump = aux.get_dump(self.NWS_URL) _logger.debug('Done') if hasattr(panda_config, 'AGIS_URL_CM'): self.AGIS_URL_CM = panda_config.AGIS_URL_CM else: self.AGIS_URL_CM = 'http://atlas-agis-api.cern.ch/request/site/query/list_links/?json' _logger.debug('Getting AGIS cost matrix dump...') self.agis_cm_dump = aux.get_dump(self.AGIS_URL_CM) _logger.debug('Done')
def __init__(self): threading.Thread.__init__(self) taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) if hasattr(panda_config,'NWS_URL'): self.NWS_URL = panda_config.NWS_URL else: self.NWS_URL = 'http://atlas-adc-netmetrics-lb.cern.ch/metrics/latest.json' _logger.debug('Getting NWS dump...') self.nws_dump = aux.get_dump(self.NWS_URL) _logger.debug('Done') if hasattr(panda_config, 'AGIS_URL_CM'): self.AGIS_URL_CM = panda_config.AGIS_URL_CM else: self.AGIS_URL_CM = 'http://atlas-agis-api.cern.ch/request/site/query/list_links/?json' _logger.debug('Getting AGIS cost matrix dump...') self.agis_cm_dump = aux.get_dump(self.AGIS_URL_CM) _logger.debug('Done')
pid = items[1] # start time timeM = re.search('(\S+\s+\d+ \d+:\d+:\d+ \d+)', line) startTime = datetime.datetime( *time.strptime(timeM.group(1), '%b %d %H:%M:%S %Y')[:6]) # kill old process if startTime < timeLimit: tmpLog.debug("old process : %s %s" % (pid, startTime)) tmpLog.debug(line) commands.getoutput('kill -9 %s' % pid) except: type, value, traceBack = sys.exc_info() tmpLog.error("kill process : %s %s" % (type, value)) # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper aSiteMapper = SiteMapper(taskBuffer) # delete tmpLog.debug("Del session") status, retSel = taskBuffer.querySQLS( "SELECT MAX(PandaID) FROM ATLAS_PANDA.jobsDefined4", {}) if retSel != None: try: maxID = retSel[0][0] tmpLog.debug("maxID : %s" % maxID) if maxID != None: varMap = {} varMap[':maxID'] = maxID
- jobs that don't belong to any task (usually HammerCloud test jobs) - analysis tasks, since there has not been any unification yet """ import datetime import sys from elasticsearch import Elasticsearch from pandalogger.PandaLogger import PandaLogger _logger = PandaLogger().getLogger('frontier_retagging') from config import panda_config from taskbuffer.TaskBuffer import taskBuffer taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) def get_frontier_failure_count_by_task(): """ retrieve failure count by task from Elastic Search """ es_host = 'atlas-kibana.mwt2.org' es_port = 9200 es_index = 'frontier' # es_index = 'frontier-%d-%02d' % (ct.year, ct.month) # prepare time window for query n_hours = 1 ct = datetime.datetime.utcnow() st = ct - datetime.timedelta(hours=n_hours) current_time = ct.strftime('%Y%m%dT%H%M%S.%f')[:-3] + 'Z'
""" import datetime import traceback import types # config file from config import panda_config # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # initialzie TaskBuffer from taskbuffer.TaskBuffer import taskBuffer taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, panda_config.nDBConnection, True) # initialize JobDispatcher from jobdispatcher.JobDispatcher import jobDispatcher if panda_config.nDBConnection != 0: jobDispatcher.init(taskBuffer) # initialize DataService from dataservice.DataService import dataService if panda_config.nDBConnection != 0: dataService.init(taskBuffer) # initialize UserIF from userinterface.UserIF import userIF if panda_config.nDBConnection != 0: userIF.init(taskBuffer)
def main(backGround=False): _logger.debug('starting ...') # register signal handler signal.signal(signal.SIGINT, catch_sig) signal.signal(signal.SIGHUP, catch_sig) signal.signal(signal.SIGTERM, catch_sig) signal.signal(signal.SIGALRM, catch_sig) signal.alarm(overallTimeout) # forking pid = os.fork() if pid != 0: # watch child process os.wait() time.sleep(1) else: # main loop from taskbuffer.TaskBuffer import taskBuffer # check certificate certName = '%s/pandasv1_usercert.pem' % panda_config.certdir keyName = '%s/pandasv1_userkey.pem' % panda_config.certdir _logger.debug('checking certificate {0}'.format(certName)) certOK, certMsg = DataServiceUtils.checkCertificate(certName) if not certOK: _logger.error('bad certificate : {0}'.format(certMsg)) # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # instantiate TB taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # ActiveMQ params queue = '/queue/Consumer.PANDA.atlas.ddm.siteservices' ssl_opts = { 'use_ssl': True, 'ssl_version': ssl.PROTOCOL_TLSv1, 'ssl_cert_file': certName, 'ssl_key_file': keyName } # resolve multiple brokers brokerList = socket.gethostbyname_ex('atlas-mb.cern.ch')[-1] # set listener connList = [] for tmpBroker in brokerList: try: clientid = 'PANDA-' + socket.getfqdn() + '-' + tmpBroker subscription_id = 'panda-server-consumer-' + socket.getfqdn() _logger.debug('setting listener %s' % clientid) conn = stomp.Connection(host_and_ports=[(tmpBroker, 61023)], **ssl_opts) connList.append(conn) except: errtype, errvalue = sys.exc_info()[:2] _logger.error("failed to connect to %s : %s %s" % (tmpBroker, errtype, errvalue)) catch_sig(None, None) while True: for conn in connList: try: if not conn.is_connected(): conn.set_listener( 'FileCallbackListener', FileCallbackListener(conn, taskBuffer, siteMapper, subscription_id)) conn.start() conn.connect(headers={'client-id': clientid}) conn.subscribe(destination=queue, id=subscription_id, ack='client-individual') _logger.debug('listener %s is up and running' % clientid) except: errtype, errvalue = sys.exc_info()[:2] _logger.error("failed to set listener on %s : %s %s" % (tmpBroker, errtype, errvalue)) catch_sig(None, None) time.sleep(5)
def main(backGround=False): _logger.debug('starting ...') # register signal handler signal.signal(signal.SIGINT, catch_sig) signal.signal(signal.SIGHUP, catch_sig) signal.signal(signal.SIGTERM,catch_sig) signal.signal(signal.SIGALRM,catch_sig) signal.alarm(overallTimeout) # forking pid = os.fork() if pid != 0: # watch child process os.wait() time.sleep(1) else: # main loop from taskbuffer.TaskBuffer import taskBuffer # check certificate certName = '/data/atlpan/pandasv1_usercert.pem' #certName = '/etc/grid-security/hostcert.pem' _logger.debug('checking certificate {0}'.format(certName)) certOK,certMsg = DataServiceUtils.checkCertificate(certName) if not certOK: _logger.error('bad certificate : {0}'.format(certMsg)) # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # instantiate TB taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1) # instantiate sitemapper siteMapper = SiteMapper(taskBuffer) # ActiveMQ params queue = '/queue/Consumer.panda.rucio.events' ssl_opts = {'use_ssl' : True, 'ssl_version' : ssl.PROTOCOL_TLSv1, 'ssl_cert_file' : certName, 'ssl_key_file' : '/data/atlpan/pandasv1_userkey.pem'} # resolve multiple brokers brokerList = socket.gethostbyname_ex('atlas-mb.cern.ch')[-1] # set listener connList = [] for tmpBroker in brokerList: try: clientid = 'PANDA-' + socket.getfqdn() + '-' + tmpBroker subscription_id = 'panda-server-consumer' _logger.debug('setting listener %s to broker %s' % (clientid, tmpBroker)) conn = stomp.Connection(host_and_ports = [(tmpBroker, 61023)], **ssl_opts) connList.append(conn) except: errtype,errvalue = sys.exc_info()[:2] _logger.error("failed to connect to %s : %s %s" % (tmpBroker,errtype,errvalue)) catch_sig(None,None) while True: for conn in connList: try: if not conn.is_connected(): conn.set_listener('DatasetCallbackListener', DatasetCallbackListener(conn,taskBuffer,siteMapper, subscription_id)) conn.start() conn.connect(headers = {'client-id': clientid}) conn.subscribe(destination=queue, id=subscription_id, ack='auto') _logger.debug('listener %s is up and running' % clientid) except: errtype,errvalue = sys.exc_info()[:2] _logger.error("failed to set listener on %s : %s %s" % (tmpBroker,errtype,errvalue)) catch_sig(None,None) time.sleep(5)
entry point """ import datetime # config file from config import panda_config # initialize cx_Oracle using dummy connection from taskbuffer.Initializer import initializer initializer.init() # initialzie TaskBuffer from taskbuffer.TaskBuffer import taskBuffer taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,panda_config.nDBConnection,True) # initialize JobDispatcher from jobdispatcher.JobDispatcher import jobDispatcher if panda_config.nDBConnection != 0: jobDispatcher.init(taskBuffer) # initialize DataService from dataservice.DataService import dataService if panda_config.nDBConnection != 0: dataService.init(taskBuffer) # initialize UserIF from userinterface.UserIF import userIF if panda_config.nDBConnection != 0: userIF.init(taskBuffer)
job.homepackage='JobTransforms-11-00-01-01' job.transformation='share/rome.g4sim.standard.trf' job.jobParameters='%s %s 1 2 14268' % (lfnI,lfnO) jobs.append(job) self.taskbuffer.storeJobs(jobs,None) time.sleep(self.interval) from taskbuffer.TaskBuffer import taskBuffer from jobdispatcher.JobDispatcher import jobDispatcher from userinterface.UserIF import userIF import getpass passwd = getpass.getpass() taskBuffer.init('adbpro.usatlas.bnl.gov',passwd,nDBConnection=3) jobDispatcher.init(taskBuffer) userIF.init(taskBuffer) jobDefID = int(time.time()) % 10000 thr1 = TestThread(taskBuffer,4,jobDefID,"myhost") thr2 = TestThread(taskBuffer,3,jobDefID+1,"testsite") thr1.start() #thr2.start() from jobdispatcher.JobDispatcher import getJob,updateJob from userinterface.UserIF import submitJobs,getJobStatus,queryPandaIDs