Beispiel #1
0
                   'reloadInput','enableJumboJobs']

# import error
import taskbuffer.ErrorCode


# FastCGI/WSGI entry
if panda_config.useFastCGI or panda_config.useWSGI:

    import os
    import cgi
    import sys
    from pandalogger.PandaLogger import PandaLogger

    # logger
    _logger = PandaLogger().getLogger('Entry')

    # dummy request object
    class DummyReq:
        def __init__(self,env,):
            # environ
            self.subprocess_env = env
            # header
            self.headers_in = {}
            # content-length
            if self.subprocess_env.has_key('CONTENT_LENGTH'):
                self.headers_in["content-length"] = self.subprocess_env['CONTENT_LENGTH']

        # get remote host    
        def get_remote_host(self):
            if self.subprocess_env.has_key('REMOTE_HOST'):
def _getPFNFromLFC(lfns,dq2url,guids,storageName,scopeList=[],tmpLog=None):
    if tmpLog == None:
        tmpLog = LogWrapper(_log,logPrefix)
    tmpLog.debug('_getPFNFromLFC %s %s / %s LFNs:%s %s' % (dq2url,str(storageName),
                                                         len(lfns),str(lfns[:3]),str(scopeList[:3])))
    outStr = ''
    # check paramter
    if guids == [] or storageName == [] or (len(lfns) != len(guids)):
        tmpLog.debug('_getPFNFromLFC done with empty list')
        return outStr
    # check scopeList
    if not scopeList in [None,[]] and len(lfns) != len(scopeList):
        tmpLog.warning('_getPFNFromLFC wrong scopeList %s %s %s %s' % (dq2url,str(storageName),
                                                                       str(lfns),str(scopeList)))
        tmpLog.error('_getPFNFromLFC failed')
        return outStr
    # loop over all LFNs
    iLFN = 0
    nLFN = 1000
    strFiles = ''    
    outStr = ''
    for iLFN in range(len(lfns)):
        if scopeList != []:
            strFiles  += '%s %s %s\n' % (lfns[iLFN],guids[iLFN],scopeList[iLFN]) 
        else:
            strFiles  += '%s %s\n' % (lfns[iLFN],guids[iLFN]) 
        # bulk operation
        if (iLFN+1) % nLFN == 0 or (iLFN+1) >= len(lfns):
            # write to file
            inFileName = '%s/lfcin.%s'  % (panda_config.logdir,commands.getoutput('uuidgen'))
            ifile = open(inFileName,'w')
            ifile.write(strFiles)
            ifile.close()
            # construct commands
            strStorage = ''
            for storage in storageName:
                strStorage += '%s,' % storage
            strStorage = strStorage[:-1]
            com = 'cd %s > /dev/null 2>&1; export HOME=%s; ' % (panda_config.home_dir_cwd,panda_config.home_dir_cwd)            
            com+= 'unset LD_LIBRARY_PATH; unset PYTHONPATH; export PATH=/usr/local/bin:/bin:/usr/bin; '
            com+= 'source %s; %s/python -Wignore %s/LFCclient.py -f %s -l %s -s %s' % \
                  (panda_config.glite_source,panda_config.native_python32,panda_config.lfcClient_dir,
                   inFileName,dq2url,strStorage)
            tmpLog.debug(com)
            # exeute
            status,output = commands.getstatusoutput(com)
            tmpLog.debug(status)
            if status == 0:
                outStr += output
            else:
                tmpLog.error("_getPFNFromLFC : %s %s %s" % (dq2url,status,output))
                # send message to logger
                try:
                    # make message
                    message = 'LFC access : %s %s %s' % (dq2url,status,output)
                    # get logger
                    _pandaLogger = PandaLogger()
                    _pandaLogger.lock()
                    _pandaLogger.setParams({'Type':'broker_util'})
                    logger = _pandaLogger.getHttpLogger(panda_config.loggername)
                    # add message
                    logger.error(message)
                    # release HTTP handler
                    _pandaLogger.release()
                except:
                    pass
                tmpLog.error('_getPFNFromLFC failed')
                return status
            # reset
            strFiles = ''
    tmpLog.debug('_getPFNFromLFC done')
    # return
    return outStr
from brokerage.SiteMapper import SiteMapper
from dataservice.Adder import Adder
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('runRebro')

_logger.debug("===================== start =====================")

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
import socket
import signal
import random
import threading
import cPickle as pickle

import OraDBProxy as DBProxy

from config import panda_config
from JobSpec import JobSpec
from FileSpec import FileSpec
from DatasetSpec import DatasetSpec
from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('ConBridge')


# exception for normal termination
class HarmlessEx(Exception):
    pass


# terminate child process by itself when master has gone
class Terminator(threading.Thread):

    # constructor
    def __init__(self, consock):
        threading.Thread.__init__(self)
        self.consock = consock
Beispiel #5
0
import re
from config import panda_config

from taskbuffer.TaskBuffer import taskBuffer

from pandalogger.PandaLogger import PandaLogger
from pandalogger.LogWrapper import LogWrapper

import panda_proxy_cache

# logger
_logger = PandaLogger().getLogger('panda_activeusers_query')
tmpLog = LogWrapper(_logger)

if __name__ == '__main__':

    tmpLog.debug("================= start ==================")
    # instantiate TB
    taskBuffer.init(panda_config.dbhost,
                    panda_config.dbpasswd,
                    nDBConnection=1)

    # instantiate MyProxy I/F
    my_proxy_interface_instance = panda_proxy_cache.MyProxyInterface()

    # roles
    if hasattr(panda_config, 'proxy_cache_roles'):
        roles = panda_config.proxy_cache_roles.split(',')
    else:
        roles = [
            'atlas', 'atlas:/atlas/Role=production', 'atlas:/atlas/Role=pilot'
Beispiel #6
0
master hander for DDM

'''

import re
import threading

from Waker     import Waker
from Finisher  import Finisher
from Activator import Activator

from pandalogger.PandaLogger import PandaLogger
from pandalogger.LogWrapper import LogWrapper

# logger
_logger = PandaLogger().getLogger('DDMHandler')


class DDMHandler (threading.Thread):
    # constructor
    def __init__(self,taskBuffer,vuid,site=None,dataset=None,scope=None):
        threading.Thread.__init__(self)
        self.vuid       = vuid
        self.taskBuffer = taskBuffer
        self.site       = site
        self.scope      = scope
        self.dataset    = dataset


    # main
    def run(self):
import socket
import datetime
import commands
import traceback
from threading import Lock
from config import panda_config
from dataservice.Adder import Adder
from pandalogger.PandaLogger import PandaLogger
from pandalogger.LogWrapper import LogWrapper
import DispatcherUtils
from taskbuffer import EventServiceUtils
from taskbuffer import retryModule
from brokerage.SiteMapper import SiteMapper

# logger
_logger = PandaLogger().getLogger('JobDispatcher')
_pilotReqLogger = PandaLogger().getLogger('PilotRequests')


# a wrapper to install timpout into a method
class _TimedMethod:
    def __init__(self, method, timeout):
        self.method = method
        self.timeout = timeout
        self.result = Protocol.TimeOutToken

    # method emulation
    def __call__(self, *var):
        self.result = apply(self.method, var)

    # run
 def __init__(self):
     self._logger = PandaLogger().getLogger('PilotStreaming')
     return
class PilotStreaming:
    def __init__(self):
        self._logger = PandaLogger().getLogger('PilotStreaming')
        return

    def run(self):
        """
        Gets and iterates over ups queues, deciding the job requirements and sending these to Harvester
        via the command interface
        :return:
        """

        # timing
        time_start = time.time()
        self._logger.debug('Start.')

        # get unified pilot streaming (ups) queues
        ups_queues = taskBuffer.ups_get_queues()
        self._logger.debug('UPS queues: {0}'.format(ups_queues))

        # get worker stats
        worker_stats = taskBuffer.ups_load_worker_stats()

        for ups_queue in ups_queues:
            # get the worker and job stats for the queue
            try:
                tmp_worker_stats = worker_stats[ups_queue]
                self._logger.debug('worker_stats for queue {0}: {1}'.format(ups_queue, tmp_worker_stats))
                # tmp_job_stats = job_stats[ups_queue]
            except KeyError:
                # skip queue if no data available
                self._logger.debug('No worker stats for queue {0}'.format(ups_queue))
                continue

            new_workers_per_harvester = taskBuffer.ups_new_worker_distribution(ups_queue, tmp_worker_stats)
            self._logger.info('queue: {0}, results: {1}'.format(ups_queue, new_workers_per_harvester))

            # variables for the harvester command
            command = '{0}:{1}'.format('SET_N_WORKERS', ups_queue)
            status = 'new'
            ack_requested = False
            lock_interval = None
            com_interval = None

            for harvester_id in new_workers_per_harvester:
                params = new_workers_per_harvester[harvester_id]
                taskBuffer.commandToHarvester(harvester_id, command, ack_requested, status,
                                              lock_interval, com_interval, params)

        # timing
        time_stop = time.time()
        self._logger.debug('Done. Pilot streaming took: {0} s'.format(time_stop - time_start))

        return
from brokerage.SiteMapper import SiteMapper
from dataservice.Adder import Adder
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('backupJobArch')

_logger.debug("===================== start =====================")

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
import os
import re
import sys
import datetime
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger
from brokerage.SiteMapper import SiteMapper

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('prioryMassage')

_logger.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

# get usage breakdown
usageBreakDownPerUser = {}
usageBreakDownPerSite = {}
workingGroupList = []
for table in ['ATLAS_PANDA.jobsActive4','ATLAS_PANDA.jobsArchived4']:
	varMap = {}
	varMap[':prodSourceLabel'] = 'user'
	if table == 'ATLAS_PANDA.jobsActive4':
Beispiel #12
0
import os
import re
import sys
from ftplib import FTP
from pandalogger.PandaLogger import PandaLogger

# supported architectures
targetArchs = ['Linux-slc5-gcc4.3.tar.gz','Linux-slc5_amd64-gcc4.3.tar.gz']

# destination dir
destDir = '/data/atlpan/srv/var/appdir'

# logger
_logger = PandaLogger().getLogger('copyROOT')

_logger.debug("===================== start =====================")

try:
    # login to root repository
    ftp = FTP('root.cern.ch')
    output = ftp.login()
    _logger.debug(output)
    output = ftp.cwd('root')
    _logger.debug(output)    
    # get list
    flist = ftp.nlst()
    # loop over all files
    for tmpFile in flist:
        # skip RC
        if re.search('-rc\d\.',tmpFile) != None:
            continue
from brokerage.SiteMapper import SiteMapper
from dataservice.Adder import Adder
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('deleteJobs')

_logger.debug("===================== start =====================")

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
import commands
import threading

from config import panda_config

# initialize cx_Oracle using dummy connection
from taskbuffer.Initializer import initializer
initializer.init()

from dataservice.Merger import Merger
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger


# logger
_logger = PandaLogger().getLogger('runMerger')

_logger.debug("================= start ==================")

# overall timeout value
overallTimeout = 60

# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=overallTimeout)
    # get process list
    scriptName = sys.argv[0]
    out = commands.getoutput('env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
    for line in out.split('\n'):
        items = line.split()
Beispiel #15
0
import os
import time
import datetime
import commands
import jobscheduler.Site
import userinterface.Client as Client
from dataservice.DDM import ddm
from taskbuffer.DBProxy import DBProxy
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger
from jobdispatcher.Watcher import Watcher

# logger
_logger = PandaLogger().getLogger('closeDS')

# password
from config import panda_config
passwd = panda_config.dbpasswd

# instantiate DB proxies
proxyS = DBProxy()
proxyS.connect(panda_config.dbhost, panda_config.dbpasswd, panda_config.dbuser,
               panda_config.dbname)

# time limit for dataset closing
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=7)

# close datasets
while True:
    sql = "SELECT vuid,name,modificationdate FROM Datasets " + \
          "WHERE type='output' AND (status='running' OR status='created' OR status='defined') " + \
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from dataservice import DataServiceUtils
from dataservice.Closer import Closer
from taskbuffer import ProcessGroups
import brokerage.broker_util
import brokerage.broker
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('datasetManager')

_logger.debug("===================== start =====================")

# use native DQ2
ddm.useDirectDQ2()

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
Beispiel #17
0
import sys
from config import panda_config

# initialize cx_Oracle using dummy connection
from taskbuffer.Initializer import initializer

initializer.init()

from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('boostUser')
_logger.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)

user = sys.stdin.read()
user = user[:-1]

sql = "UPDATE atlas_panda.%s set currentPriority=:prio where prodUserName=:uname and prodSourceLabel IN (:label1,:label2) and currentPriority<:prio"
varMap = {}
varMap[':prio'] = 4000
varMap[':uname'] = user
varMap[':label1'] = 'user'
varMap[':label2'] = 'panda'
for table in ('jobsactive4', 'jobsdefined4'):
    _logger.debug((sql % table) + str(varMap))
    ret = taskBuffer.querySQLS(sql % table, varMap)
    _logger.debug('ret -> %s' % str(ret))
Beispiel #18
0
import os
import time
import datetime
import commands
import jobscheduler.Site
import userinterface.Client as Client
from dataservice.DDM import ddm
from taskbuffer.DBProxy import DBProxy
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger
from jobdispatcher.Watcher import Watcher

# logger
_logger = PandaLogger().getLogger('closeDS')

# password
from config import panda_config
passwd = panda_config.dbpasswd

# instantiate DB proxies
proxyS = DBProxy()
proxyS.connect(panda_config.dbhost,panda_config.dbpasswd,panda_config.dbuser,panda_config.dbname)

# time limit for dataset closing
timeLimit = datetime.datetime.utcnow() - datetime.timedelta(days=7)

# close datasets
while True:
    sql = "SELECT vuid,name,modificationdate FROM Datasets " + \
          "WHERE type='output' AND (status='running' OR status='created' OR status='defined') " + \
          "AND modificationdate<'%s' AND name REGEXP '_sub[[:digit:]]+$'"
Beispiel #19
0
import zlib
import uuid
import time
import socket
import struct
import datetime
import jobdispatcher.Protocol as Protocol
import ErrorCode
from userinterface import Client
from config import panda_config

from pandalogger.PandaLogger import PandaLogger
from pandalogger.LogWrapper import LogWrapper

# logger
_logger = PandaLogger().getLogger('Utils')


# check if server is alive
def isAlive(req):
    return "alive=yes"


# extract name from DN
def cleanUserID(id):
    try:
        up = re.compile('/(DC|O|OU|C|L)=[^\/]+')
        username = up.sub('', id)
        up2 = re.compile('/CN=[0-9]+')
        username = up2.sub('', username)
        up3 = re.compile(' [0-9]+')
Beispiel #20
0
import random
import datetime
import commands
import threading
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger
from dataservice.AdderGen import AdderGen
from brokerage.SiteMapper import SiteMapper
from pandautils import PandaUtils

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('add')

_logger.debug("===================== start =====================")

# overall timeout value
overallTimeout = 20

# current minute
currentMinute = datetime.datetime.utcnow().minute

# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=overallTimeout)
    # get process list
    scriptName = sys.argv[0]
Beispiel #21
0
import threading
import traceback
import aux
from aux import *
from datetime import datetime, timedelta

from config import panda_config
from pandalogger.PandaLogger import PandaLogger
import db_interface as dbif
from taskbuffer.TaskBuffer import taskBuffer

_logger = PandaLogger().getLogger('configurator')
_session = dbif.get_session()

# Definitions of roles
WRITE_LAN = 'write_lan'
READ_LAN = 'read_lan'

class Configurator(threading.Thread):

    def __init__(self):
        threading.Thread.__init__(self)

        if hasattr(panda_config, 'AGIS_URL_SITES'):
            self.AGIS_URL_SITES = panda_config.AGIS_URL_SITES
        else:
            self.AGIS_URL_SITES = 'http://atlas-agis-api.cern.ch/request/site/query/?json&vo_name=atlas&state=ACTIVE'
        _logger.debug('Getting site dump...')
        self.site_dump = aux.get_dump(self.AGIS_URL_SITES)
        _logger.debug('Done')
        self.site_endpoint_dict = self.get_site_endpoint_dictionary()
from pandaserver.taskbuffer.OraDBProxy import DBProxy
import socket
from config import panda_config
import time
import urlparse
from pandalogger.PandaLogger import PandaLogger
_logger = PandaLogger().getLogger('testGetCriteriaGlobalShares')
from testutils import sendCommand

def retrieveJob(site):
    function = "getJob"
    node = {}
    node['siteName'] = site
    node['mem'] = 1000
    node['node'] = socket.getfqdn()

    data = sendCommand(function, node, _logger)
    jobD = urlparse.parse_qs(data)  # jobD indicates it's a job in dictionary format, not a JobSpec object
    return jobD


if __name__ == "__main__":
    proxyS = DBProxy()
    proxyS.connect(panda_config.dbhost,panda_config.dbpasswd,panda_config.dbuser,panda_config.dbname)

    #proxyS.getCriteriaForGlobalShares('BNL-OSG')

    site = 'CERN-PROD'

    DIRECT = 'direct'
    WEB = 'web'
Beispiel #23
0
#PanDA server libraries
from config import panda_config
from pandalogger.PandaLogger import PandaLogger

#Configurator libraries
from models import Site, PandaSite, DdmEndpoint, Schedconfig, Jobsactive4, SiteStats

#Read connection parameters
__host = panda_config.dbhost
__user = panda_config.dbuser
__passwd = panda_config.dbpasswd
__dbname = panda_config.dbname

#Instantiate logger
_logger = PandaLogger().getLogger('configurator_dbif')

#Log the SQL produced by SQLAlchemy
__echo = True

#Create the SQLAlchemy engine
try:
    __engine = sqlalchemy.create_engine("oracle://%s:%s@%s"%(__user, __passwd, __host), 
                                         echo=__echo)
except exc.SQLAlchemyError:
    _logger.critical("Could not load the DB engine: %s"%sys.exc_info())
    raise


def get_session():
    return sessionmaker(bind=__engine)()
Beispiel #24
0
import datetime
import commands
import threading

from config import panda_config

# initialize cx_Oracle using dummy connection
from taskbuffer.Initializer import initializer
initializer.init()

from dataservice.Merger import Merger
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('runMerger')

_logger.debug("================= start ==================")

# overall timeout value
overallTimeout = 60

# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(
        minutes=overallTimeout)
    # get process list
    scriptName = sys.argv[0]
    out = commands.getoutput(
        'env TZ=UTC ps axo user,pid,lstart,args | grep %s' % scriptName)
Beispiel #25
0
import traceback
import brokerage.broker
from dataservice import DynDataDistributer
from dataservice.MailUtils import MailUtils
from dataservice.Notifier import Notifier
from taskbuffer.JobSpec import JobSpec
from userinterface import Client

from dataservice.DDM import rucioAPI

from config import panda_config
from pandalogger.PandaLogger import PandaLogger
from pandalogger.LogWrapper import LogWrapper

# logger
_logger = PandaLogger().getLogger('EventPicker')


class EventPicker:
    # constructor
    def __init__(self, taskBuffer, siteMapper, evpFileName, ignoreError):
        self.taskBuffer = taskBuffer
        self.siteMapper = siteMapper
        self.ignoreError = ignoreError
        self.evpFileName = evpFileName
        self.token = datetime.datetime.utcnow().isoformat(' ')
        # logger
        self.logger = LogWrapper(_logger, self.token)
        self.pd2p = DynDataDistributer.DynDataDistributer([],
                                                          self.taskBuffer,
                                                          self.siteMapper,
Beispiel #26
0
import fcntl
import commands
import urllib
import shelve
import smtplib
import datetime
import time

from config import panda_config
from taskbuffer.OraDBProxy import DBProxy
from pandalogger.PandaLogger import PandaLogger
from dataservice.DDM import dq2Info
import taskbuffer.ErrorCode

# logger
_logger = PandaLogger().getLogger('Notifier')

# lock file
_lockGetMail = open(panda_config.lockfile_getMail, 'w')

# ignored DN
_ignoreList = [
    'Nurcan Ozturk',
    'Xin Zhao',
    'Dietrich Liko',
    ]

# NG words in email address
_ngWordsInMailAddr = ['support','system','stuff','service','secretariat','club','user','admin',
                      'cvs','grid','librarian','svn','atlas','cms','lhcb','alice','alaelp']
Beispiel #27
0
from pandalogger.PandaLogger import PandaLogger
tmpPandaLogger = PandaLogger()
tmpPandaLogger.lock()
tmpPandaLogger.setParams({'Type': 'retryModule'})
tmpLogger = tmpPandaLogger.getHttpLogger('dev')
tmpLogger.debug("This is only a test")
Beispiel #28
0
import sys
import time
from pandalogger.PandaLogger import PandaLogger
from config import panda_config
import re
from re import error as ReError
# logger
_logger = PandaLogger().getLogger('RetrialModule')

NO_RETRY = 'no_retry'
INCREASE_MEM = 'increase_memory'
LIMIT_RETRY = 'limit_retry'
INCREASE_CPU = 'increase_cputime'


def pandalog(message):
    """
    Function to send message to panda logger.
    https://github.com/PanDAWMS/panda-jedi/blob/master/pandajedi/jediorder/JobGenerator.py#L405
    """
    try:
        # get logger and lock it
        tmpPandaLogger = PandaLogger()
        tmpPandaLogger.lock()
        # set category (usually prod) and type
        tmpPandaLogger.setParams({'Type': 'retryModule'})
        tmpLogger = tmpPandaLogger.getHttpLogger(panda_config.loggername)
        # send the message and release the logger
        tmpLogger.debug(message)
        tmpPandaLogger.release()
    except Exception as e:
def updateJob(req,jobId,state,token=None,transExitCode=None,pilotErrorCode=None,pilotErrorDiag=None,timestamp=None,timeout=60,
              xml='',node=None,workdir=None,cpuConsumptionTime=None,cpuConsumptionUnit=None,remainingSpace=None,
              schedulerID=None,pilotID=None,siteName=None,messageLevel=None,pilotLog='',metaData='',
              cpuConversionFactor=None,exeErrorCode=None,exeErrorDiag=None,pilotTiming=None,computingElement=None,
              startTime=None,endTime=None,nEvents=None,nInputFiles=None,batchID=None,attemptNr=None,jobMetrics=None,
              stdout='',jobSubStatus=None,coreCount=None):
    _logger.debug("updateJob(%s)" % jobId)
    # get DN
    realDN = _getDN(req)
    # get FQANs
    fqans = _getFQAN(req)
    # check production role
    prodManager = _checkRole(fqans,realDN,jobDispatcher,site=siteName,hostname=req.get_remote_host())
    # check token
    validToken = _checkToken(token,jobDispatcher)
    _logger.debug("updateJob(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,attemptNr:%s,jobSubStatus:%s,core:%s,DN:%s,role:%s,token:%s,val:%s,FQAN:%s\n==XML==\n%s\n==LOG==\n%s\n==Meta==\n%s\n==Metrics==\n%s\n==stdout==\n%s)" %
                  (jobId,state,transExitCode,pilotErrorCode,pilotErrorDiag,node,workdir,cpuConsumptionTime,
                   cpuConsumptionUnit,remainingSpace,schedulerID,pilotID,siteName,messageLevel,nEvents,nInputFiles,
                   cpuConversionFactor,exeErrorCode,exeErrorDiag,pilotTiming,computingElement,startTime,endTime,
                   batchID,attemptNr,jobSubStatus,coreCount,realDN,prodManager,token,validToken,str(fqans),xml,pilotLog,metaData,jobMetrics,
                   stdout))
    _pilotReqLogger.info('method=updateJob,site=%s,node=%s,type=None' % (siteName,node))
    # invalid role
    if not prodManager:
        _logger.warning("updateJob(%s) : invalid role" % jobId)
        return Protocol.Response(Protocol.SC_Role).encode()        
    # invalid token
    if not validToken:
        _logger.warning("updateJob(%s) : invalid token" % jobId)
        return Protocol.Response(Protocol.SC_Invalid).encode()        
    # aborting message
    if jobId=='NULL':
        return Protocol.Response(Protocol.SC_Success).encode()
    # check status
    if not state in ['running','failed','finished','holding','starting','transferring']:
        _logger.warning("invalid state=%s for updateJob" % state)
        return Protocol.Response(Protocol.SC_Success).encode()        
    # pilot log
    if pilotLog != '':
        try:
            # make message
            message = pilotLog
            # get logger
            _pandaLogger = PandaLogger()
            _pandaLogger.lock()
            _pandaLogger.setParams({'Type':'pilotLog','PandaID':int(jobId)})
            logger = _pandaLogger.getHttpLogger(panda_config.loggername)
            # add message
            logger.info(message)                
            # release HTTP handler
            _pandaLogger.release()
        except:
            pass
    # create parameter map
    param = {}
    if cpuConsumptionTime != None:
        param['cpuConsumptionTime']=cpuConsumptionTime
    if cpuConsumptionUnit != None:
        param['cpuConsumptionUnit']=cpuConsumptionUnit
    if node != None:
        param['modificationHost']=node[:128]
    if transExitCode != None:
        param['transExitCode']=transExitCode
    if pilotErrorCode != None:
        param['pilotErrorCode']=pilotErrorCode
    if pilotErrorDiag != None:
        param['pilotErrorDiag']=pilotErrorDiag[:500]
    if jobMetrics != None:
        param['jobMetrics']=jobMetrics[:500]
    if schedulerID != None:
        param['schedulerID']=schedulerID
    if pilotID != None:
        param['pilotID']=pilotID[:200]
    if batchID != None:
        param['batchID']=batchID
    if exeErrorCode != None:
        param['exeErrorCode']=exeErrorCode
    if exeErrorDiag != None:
        param['exeErrorDiag']=exeErrorDiag[:500]
    if cpuConversionFactor != None:
        param['cpuConversion']=cpuConversionFactor
    if pilotTiming != None:
        param['pilotTiming']=pilotTiming
    if computingElement != None:
        param['computingElement']=computingElement
    if nEvents != None:
        param['nEvents']=nEvents
    if nInputFiles != None:
        param['nInputFiles']=nInputFiles
    if not jobSubStatus in [None,'']:
        param['jobSubStatus']=jobSubStatus
    if not coreCount in [None,'']:
        param['actualCoreCount']=coreCount
    if startTime != None:
        try:
            param['startTime']=datetime.datetime(*time.strptime(startTime,'%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if endTime != None:
        try:
            param['endTime']=datetime.datetime(*time.strptime(endTime,'%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if attemptNr != None:
        try:
            attemptNr = int(attemptNr)
        except:
            attemptNr = None
    if stdout != '':
        stdout = stdout[:2048]
    # invoke JD
    return jobDispatcher.updateJob(int(jobId),state,int(timeout),xml,siteName,
                                   param,metaData,attemptNr,stdout)
Beispiel #30
0
def do_log_rollover():
    PandaLogger.doRollOver()
Beispiel #31
0
import os
import re
import sys
import datetime
import traceback
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger
from pandalogger.LogWrapper import LogWrapper
from brokerage.SiteMapper import SiteMapper

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('prioryMassage')
tmpLog = LogWrapper(_logger)

tmpLog.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)

# instantiate sitemapper
siteMapper = SiteMapper(taskBuffer)

# get usage breakdown
usageBreakDownPerUser = {}
usageBreakDownPerSite = {}
workingGroupList = []
for table in ['ATLAS_PANDA.jobsActive4', 'ATLAS_PANDA.jobsArchived4']:
Beispiel #32
0
except ImportError:
    from StringIO import StringIO

import BaseHTTPServer
from SimpleHTTPServer import SimpleHTTPRequestHandler
if not "." in sys.path: sys.path.append('.')
try:
   from pmConfig import pmConfig as config
   from pmUtils import pmUtils as utils
   from pmServer.pmHandler import pmHandler
   from pmUtils.pmState import pmstate
   from pandalogger.PandaLogger import PandaLogger
except:
    print sys.path
    raise
_logger = PandaLogger().getLogger('pmMain')

def toLog(str,req):
    tmpstr = "ps h -p %s -o pcpu,pmem,cputime" % (os.getpid())
    tmplist = tmpstr.split()
    _logger.debug("%s %s: ppid=%s pid=%s cpu=%s%% mem=%s%% query=%s client=%s" % \
            (str, datetime.utcnow(), os.getppid(), os.getpid(), tmplist[0], tmplist[1], req.unparsed_uri, req.get_remote_host()))
    return

def mmCode(format):
   mmcode = {    'json'   : 'application/json'
               , 'script' : 'application/javascript'
               , 'html'   : 'text/html'
               , 'default': 'text/html'
             }
   return   mmcode.get(format,"text/html")
import commands
import optparse
import datetime
import cPickle as pickle

from dq2.common import log as logging
from dq2.common import stomp
from config import panda_config
from brokerage.SiteMapper import SiteMapper
from dataservice.Finisher import Finisher
from dataservice import DataServiceUtils


# logger
from pandalogger.PandaLogger import PandaLogger
_logger = PandaLogger().getLogger('fileCallbackListener')

# keep PID
pidFile = '%s/file_callback_listener.pid' % panda_config.logdir

# overall timeout value
overallTimeout = 60 * 59

# expiration time
expirationTime = datetime.datetime.utcnow() + datetime.timedelta(minutes=overallTimeout)


# kill whole process
def catch_sig(sig, frame):
    try:
        os.remove(pidFile)
import time
import threading
import sys
import aux
from aux import *
from datetime import datetime, timedelta

from sqlalchemy import exc

from config import panda_config
from pandalogger.PandaLogger import PandaLogger
import db_interface as dbif
from configurator.models import Schedconfig
from taskbuffer.TaskBuffer import taskBuffer

_logger = PandaLogger().getLogger('configurator')
_session = dbif.get_session()


class Configurator(threading.Thread):

    def __init__(self):
        threading.Thread.__init__(self)

        if hasattr(panda_config,'AGIS_URL_SITES'):
            self.AGIS_URL_SITES = panda_config.AGIS_URL_SITES
        else:
            self.AGIS_URL_SITES = 'http://atlas-agis-api.cern.ch/request/site/query/?json&vo_name=atlas&state=ACTIVE'
        _logger.debug('Getting site dump...')
        self.site_dump = aux.get_dump(self.AGIS_URL_SITES)
        _logger.debug('Done')
Beispiel #35
0
from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger
from jobdispatcher.Watcher import Watcher
from brokerage.SiteMapper import SiteMapper
from dataservice.Finisher import Finisher
from dataservice.MailUtils import MailUtils
from taskbuffer import ProcessGroups
import taskbuffer.ErrorCode
import dataservice.DDM

# password
from config import panda_config
passwd = panda_config.dbpasswd

# logger
_logger = PandaLogger().getLogger('runRebro')

_logger.debug("===================== start =====================")

# memory checker
def _memoryCheck(str):
    try:
        proc_status = '/proc/%d/status' % os.getpid()
        procfile = open(proc_status)
        name   = ""
        vmSize = ""
        vmRSS  = ""
        # extract Name,VmSize,VmRSS
        for line in procfile:
            if line.startswith("Name:"):
                name = line.split()[-1]
Beispiel #36
0
import subprocess
import hashlib
import os
import datetime

from pandalogger.PandaLogger import PandaLogger
# logger
_logger = PandaLogger().getLogger('ProxyCache')


def execute(program):
    """Run a program on the command line. Return stderr, stdout and status."""
    _logger.debug("executable: %s" % program)
    pipe = subprocess.Popen(program,
                            bufsize=-1,
                            shell=True,
                            close_fds=False,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    stdout, stderr = pipe.communicate()
    return stdout, stderr, pipe.wait()


def cat(filename):
    """Given filename, print its text contents."""
    f = open(filename, 'r')
    out = f.read()
    f.close()
    return out

Beispiel #37
0
"""
WrappedCursor for a generic database connection proxy

"""

import re
import os
import sys
import warnings
from pandalogger.PandaLogger import PandaLogger
from config import panda_config

warnings.filterwarnings('ignore')

# logger
_logger = PandaLogger().getLogger('WrappedCursor')

# proxy
class WrappedCursor(object):

    # constructor
    def __init__(self, connection):
        # connection object
        self.conn = connection
        # cursor object
        self.cur = self.conn.cursor()
        # backend
        self.backend = panda_config.backend
        # statement
        self.statement = None
Beispiel #38
0
#PanDA server libraries
from config import panda_config
from pandalogger.PandaLogger import PandaLogger

#Configurator libraries
from models import Site, PandaSite, DdmEndpoint, Schedconfig, Jobsactive4, SiteStats, PandaDdmRelation

#Read connection parameters
__host = panda_config.dbhost
__user = panda_config.dbuser
__passwd = panda_config.dbpasswd
__dbname = panda_config.dbname

#Instantiate logger
_logger = PandaLogger().getLogger('configurator_dbif')

#Log the SQL produced by SQLAlchemy
__echo = False

#Create the SQLAlchemy engine
try:
    __engine = sqlalchemy.create_engine("oracle://%s:%s@%s"%(__user, __passwd, __host), 
                                         echo=__echo)
except exc.SQLAlchemyError:
    _logger.critical("Could not load the DB engine: %s"%sys.exc_info())
    raise


def get_session():
    return sessionmaker(bind=__engine)()
Beispiel #39
0
import re
import sys
import glob
import time
import os.path
import commands
import datetime
import threading
from config import panda_config
from taskbuffer.TaskBuffer import taskBuffer
from brokerage import SiteMapper
from dataservice.EventPicker import EventPicker
from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('evpPD2P')

_logger.debug("===================== start =====================")

# overall timeout value
overallTimeout = 300
# prefix of evp files
prefixEVP = 'evp.'
# file pattern of evp files
evpFilePatt = panda_config.cache_dir + '/' + prefixEVP + '*'

# kill old process
try:
    # time limit
    timeLimit = datetime.datetime.utcnow() - datetime.timedelta(minutes=overallTimeout)
    # get process list
import sys
from threading import Lock

from config import panda_config

# logger
from pandalogger.PandaLogger import PandaLogger
_logger = PandaLogger().getLogger('Initializer')

# initialize cx_Oracle using dummy connection to avoid "Unable to acquire Oracle environment handle"
class Initializer:
    def __init__(self):
        self.lock = Lock()
        self.first = True

    def init(self):
        _logger.debug("init new=%s" % self.first)
        # do nothing when nDBConnection is 0
        if panda_config.nDBConnection == 0:
            return True
        # lock
        self.lock.acquire()
        if self.first:
            self.first = False
            try:
                _logger.debug("connect")
                # connect
                if panda_config.backend == 'oracle':
                    import cx_Oracle
                    conn = cx_Oracle.connect(dsn=panda_config.dbhost,user=panda_config.dbuser,
                                             password=panda_config.dbpasswd,threaded=True)
def updateJob(req,
              jobId,
              state,
              token=None,
              transExitCode=None,
              pilotErrorCode=None,
              pilotErrorDiag=None,
              timestamp=None,
              timeout=60,
              xml='',
              node=None,
              workdir=None,
              cpuConsumptionTime=None,
              cpuConsumptionUnit=None,
              remainingSpace=None,
              schedulerID=None,
              pilotID=None,
              siteName=None,
              messageLevel=None,
              pilotLog='',
              metaData='',
              cpuConversionFactor=None,
              exeErrorCode=None,
              exeErrorDiag=None,
              pilotTiming=None,
              computingElement=None,
              startTime=None,
              endTime=None,
              nEvents=None,
              nInputFiles=None,
              batchID=None,
              attemptNr=None,
              jobMetrics=None,
              stdout='',
              jobSubStatus=None,
              coreCount=None,
              maxRSS=None,
              maxVMEM=None,
              maxSWAP=None,
              maxPSS=None,
              avgRSS=None,
              avgVMEM=None,
              avgSWAP=None,
              avgPSS=None):
    tmpLog = LogWrapper(
        _logger, 'updateJob PandaID={0} PID={1}'.format(jobId, os.getpid()))
    tmpLog.debug('start')
    # get DN
    realDN = _getDN(req)
    # get FQANs
    fqans = _getFQAN(req)
    # check production role
    prodManager = _checkRole(fqans,
                             realDN,
                             jobDispatcher,
                             site=siteName,
                             hostname=req.get_remote_host())
    # check token
    validToken = _checkToken(token, jobDispatcher)
    # accept json
    acceptJson = req.acceptJson()
    _logger.debug(
        "updateJob(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,attemptNr:%s,jobSubStatus:%s,core:%s,DN:%s,role:%s,token:%s,val:%s,FQAN:%s,maxRSS=%s,maxVMEM=%s,maxSWAP=%s,maxPSS=%s,avgRSS=%s,avgVMEM=%s,avgSWAP=%s,avgPSS=%s\n==XML==\n%s\n==LOG==\n%s\n==Meta==\n%s\n==Metrics==\n%s\n==stdout==\n%s)"
        % (jobId, state, transExitCode, pilotErrorCode, pilotErrorDiag, node,
           workdir, cpuConsumptionTime, cpuConsumptionUnit, remainingSpace,
           schedulerID, pilotID, siteName, messageLevel, nEvents, nInputFiles,
           cpuConversionFactor, exeErrorCode, exeErrorDiag, pilotTiming,
           computingElement, startTime, endTime, batchID, attemptNr,
           jobSubStatus, coreCount, realDN, prodManager, token, validToken,
           str(fqans), maxRSS, maxVMEM, maxSWAP, maxPSS, avgRSS, avgVMEM,
           avgSWAP, avgPSS, xml, pilotLog, metaData, jobMetrics, stdout))
    _pilotReqLogger.info('method=updateJob,site=%s,node=%s,type=None' %
                         (siteName, node))
    # invalid role
    if not prodManager:
        _logger.warning("updateJob(%s) : invalid role" % jobId)
        return Protocol.Response(Protocol.SC_Role).encode(acceptJson)
    # invalid token
    if not validToken:
        _logger.warning("updateJob(%s) : invalid token" % jobId)
        return Protocol.Response(Protocol.SC_Invalid).encode(acceptJson)
    # aborting message
    if jobId == 'NULL':
        return Protocol.Response(Protocol.SC_Success).encode(acceptJson)
    # check status
    if not state in [
            'running', 'failed', 'finished', 'holding', 'starting',
            'transferring'
    ]:
        _logger.warning("invalid state=%s for updateJob" % state)
        return Protocol.Response(Protocol.SC_Success).encode(acceptJson)
    # pilot log
    tmpLog.debug('sending log')
    if pilotLog != '':
        try:
            # make message
            message = pilotLog
            # get logger
            _pandaLogger = PandaLogger()
            _pandaLogger.lock()
            _pandaLogger.setParams({'Type': 'pilotLog', 'PandaID': int(jobId)})
            logger = _pandaLogger.getHttpLogger(panda_config.loggername)
            # add message
            logger.info(message)
        except:
            tmpLog.debug('failed to send log')
        finally:
            tmpLog.debug('release lock')
            try:
                # release HTTP handler
                _pandaLogger.release()
            except:
                pass
    tmpLog.debug('done log')
    # create parameter map
    param = {}
    if cpuConsumptionTime != None:
        param['cpuConsumptionTime'] = cpuConsumptionTime
    if cpuConsumptionUnit != None:
        param['cpuConsumptionUnit'] = cpuConsumptionUnit
    if node != None:
        param['modificationHost'] = node[:128]
    if transExitCode != None:
        param['transExitCode'] = transExitCode
    if pilotErrorCode != None:
        param['pilotErrorCode'] = pilotErrorCode
    if pilotErrorDiag != None:
        param['pilotErrorDiag'] = pilotErrorDiag[:500]
    if jobMetrics != None:
        param['jobMetrics'] = jobMetrics[:500]
    if schedulerID != None:
        param['schedulerID'] = schedulerID
    if pilotID != None:
        param['pilotID'] = pilotID[:200]
    if batchID != None:
        param['batchID'] = batchID[:80]
    if exeErrorCode != None:
        param['exeErrorCode'] = exeErrorCode
    if exeErrorDiag != None:
        param['exeErrorDiag'] = exeErrorDiag[:500]
    if cpuConversionFactor != None:
        param['cpuConversion'] = cpuConversionFactor
    if pilotTiming != None:
        param['pilotTiming'] = pilotTiming
    if computingElement != None:
        param['computingElement'] = computingElement
    if nEvents != None:
        param['nEvents'] = nEvents
    if nInputFiles != None:
        param['nInputFiles'] = nInputFiles
    if not jobSubStatus in [None, '']:
        param['jobSubStatus'] = jobSubStatus
    if not coreCount in [None, '']:
        param['actualCoreCount'] = coreCount
    if maxRSS != None:
        param['maxRSS'] = maxRSS
    if maxVMEM != None:
        param['maxVMEM'] = maxVMEM
    if maxSWAP != None:
        param['maxSWAP'] = maxSWAP
    if maxPSS != None:
        param['maxPSS'] = maxPSS
    if avgRSS != None:
        param['avgRSS'] = avgRSS
    if avgVMEM != None:
        param['avgVMEM'] = avgVMEM
    if avgSWAP != None:
        param['avgSWAP'] = avgSWAP
    if avgPSS != None:
        param['avgPSS'] = avgPSS
    if startTime != None:
        try:
            param['startTime'] = datetime.datetime(
                *time.strptime(startTime, '%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if endTime != None:
        try:
            param['endTime'] = datetime.datetime(
                *time.strptime(endTime, '%Y-%m-%d %H:%M:%S')[:6])
        except:
            pass
    if attemptNr != None:
        try:
            attemptNr = int(attemptNr)
        except:
            attemptNr = None
    if stdout != '':
        stdout = stdout[:2048]
    # invoke JD
    tmpLog.debug('executing')
    return jobDispatcher.updateJob(int(jobId), state, int(timeout), xml,
                                   siteName, param, metaData, attemptNr,
                                   stdout, acceptJson)
Beispiel #42
0
import traceback
import xml.dom.minidom
import ErrorCode
import uuid

import Closer

from config import panda_config
from pandalogger.PandaLogger import PandaLogger
from pandalogger.LogWrapper import LogWrapper
from taskbuffer import EventServiceUtils
from taskbuffer import retryModule
import taskbuffer.ErrorCode

# logger
_logger = PandaLogger().getLogger('Adder')

panda_config.setupPlugin()


class AdderGen:
    # constructor
    def __init__(self,
                 taskBuffer,
                 jobID,
                 jobStatus,
                 xmlFile,
                 ignoreTmpError=True,
                 siteMapper=None):
        self.job = None
        self.jobID = jobID
Beispiel #43
0
def do_log_rollover():
    PandaLogger.doRollOver()
Beispiel #44
0
import re
import sys
import copy
import traceback
from config import panda_config

# logger
from pandalogger.PandaLogger import PandaLogger
_logger = PandaLogger().getLogger('SiteMapper')

# PandaIDs
from PandaSiteIDs import PandaSiteIDs

# default site
from taskbuffer.SiteSpec import SiteSpec
from taskbuffer.NucleusSpec import NucleusSpec

defSite = SiteSpec()
defSite.sitename = panda_config.def_sitename
defSite.nickname = panda_config.def_nickname
defSite.dq2url = panda_config.def_dq2url
defSite.ddm_input = panda_config.def_ddm
defSite.ddm_output = panda_config.def_ddm
defSite.type = panda_config.def_type
defSite.gatekeeper = panda_config.def_gatekeeper
defSite.status = panda_config.def_status
defSite.setokens_input = {}
defSite.setokens_output = {}

worldCloudName = 'WORLD'
nucleusTag = 'nucleus:'
Beispiel #45
0
'''

import re
import sys
import commands
import threading
from DDM import ddm
from config import panda_config

from brokerage.SiteMapper import SiteMapper

from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('Finisher')


class Finisher(threading.Thread):
    # constructor
    def __init__(self, taskBuffer, dataset, job=None, site=None):
        threading.Thread.__init__(self)
        self.dataset = dataset
        self.taskBuffer = taskBuffer
        self.job = job
        self.site = site

    # main
    def run(self):
        # start
        try:
Beispiel #46
0
'''
awake jobs in waiting table

'''

import time
import threading
from DDM import ddm

from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('Waker')


class Waker (threading.Thread):
    # constructor
    def __init__(self,taskBuffer,dataset):
        threading.Thread.__init__(self)
        self.dataset = dataset
        self.taskBuffer = taskBuffer


    # main
    def run(self):
        _logger.debug("start: %s" % self.dataset.name)
        # get file list from DDM
        for iDDMTry in range(3):        
            status,out = ddm.DQ2.main('listFilesInDataset',self.dataset.name)
            if status != 0 and out.find("DQ2 unknown dataset exception") != -1:
                break
 def __init__(self):
     self._logger = PandaLogger().getLogger('PilotStreaming')
     return
Beispiel #48
0
import sys
from config import panda_config

# initialize cx_Oracle using dummy connection
from taskbuffer.Initializer import initializer
initializer.init()

from taskbuffer.TaskBuffer import taskBuffer
from pandalogger.PandaLogger import PandaLogger


# logger
_logger = PandaLogger().getLogger('boostUser')
_logger.debug("================= start ==================")

# instantiate TB
taskBuffer.init(panda_config.dbhost,panda_config.dbpasswd,nDBConnection=1)

user = sys.stdin.read()
user = user[:-1]

sql = "UPDATE atlas_panda.%s set currentPriority=:prio where prodUserName=:uname and prodSourceLabel IN (:label1,:label2) and currentPriority<:prio"
varMap = {}
varMap[':prio'] = 4000
varMap[':uname'] = user
varMap[':label1'] = 'user'
varMap[':label2'] = 'panda'
for table in ('jobsactive4','jobsdefined4'):
	_logger.debug((sql % table) + str(varMap))
	ret = taskBuffer.querySQLS(sql % table,varMap)
	_logger.debug('ret -> %s' % str(ret))
Beispiel #49
0
import datetime
import cPickle as pickle
import stomp

from dq2.common import log as logging
from config import panda_config
from brokerage.SiteMapper import SiteMapper
from dataservice.Finisher import Finisher
from dataservice import DataServiceUtils

import logging
logging.basicConfig(level=logging.DEBUG)

# logger
from pandalogger.PandaLogger import PandaLogger
_logger = PandaLogger().getLogger('fileCallbackListener')

# keep PID
pidFile = '%s/file_callback_listener.pid' % panda_config.logdir

# overall timeout value
overallTimeout = 60 * 59

# expiration time
expirationTime = datetime.datetime.utcnow() + datetime.timedelta(
    minutes=overallTimeout)


# kill whole process
def catch_sig(sig, frame):
    try:
Beispiel #50
0
import threading
import traceback
import ErrorCode

import taskbuffer.ErrorCode

from taskbuffer import EventServiceUtils
from taskbuffer import retryModule

from brokerage.PandaSiteIDs import PandaSiteIDs

from dataservice.Closer import Closer
from pandalogger.PandaLogger import PandaLogger

# logger
_logger = PandaLogger().getLogger('Watcher')


class Watcher(threading.Thread):
    # constructor
    def __init__(self,
                 taskBuffer,
                 pandaID,
                 single=False,
                 sleepTime=360,
                 sitemapper=None):
        threading.Thread.__init__(self)
        self.pandaID = pandaID
        self.taskBuffer = taskBuffer
        self.sleepTime = sleepTime
        self.single = single
based on Ilija Vukotic scripts here: https://github.com/ATLAS-Analytics/AlarmAndAlertService/blob/master/frontier-failed-q.py

These tasks are getting reassigned to a separate, throttled global-share with Frontier heavy tasks. Exceptions that
will not be reassigned are:
 - jobs that don't belong to any task (usually HammerCloud test jobs)
 - analysis tasks, since there has not been any unification yet
"""

import datetime
import sys


from elasticsearch import Elasticsearch

from pandalogger.PandaLogger import PandaLogger
_logger = PandaLogger().getLogger('frontier_retagging')

from config import panda_config
from taskbuffer.TaskBuffer import taskBuffer
taskBuffer.init(panda_config.dbhost, panda_config.dbpasswd, nDBConnection=1)

def get_frontier_failure_count_by_task():
    """
    retrieve failure count by task from Elastic Search
    """
    es_host = 'atlas-kibana.mwt2.org'
    es_port = 9200
    es_index = 'frontier'
    # es_index = 'frontier-%d-%02d' % (ct.year, ct.month)

    # prepare time window for query
Beispiel #52
0
from pandalogger.PandaLogger import PandaLogger
tmpPandaLogger = PandaLogger()
tmpPandaLogger.lock()
tmpPandaLogger.setParams({'Type':'retryModule'})
tmpLogger = tmpPandaLogger.getHttpLogger('dev')
tmpLogger.debug("This is only a test")