Ejemplo n.º 1
0
def host_usagetest_consumer():
    '''host_usagetest_consumer() -> takes usage test results from producers of  
           such a metric ( 1-Nagios, 2-url of a UI) and populates WMSMonitor database
        '''

    import os, commands, sys, fpformat
    sys.path.append('../common')
    import time
    import datetime
    import readconf_func
    import logging
    import socket
    import MySQLdb
    import urllib

    confvar = readconf_func.readconf()

    #CONNECTING TO DB
    #Opening myslq db connection
    logger.info("Starting db connection")
    try:
        db = MySQLdb.connection(host=confvar.get('WMSMON_DB_HOST'),
                                user=confvar.get('WMSMON_DB_USER'),
                                passwd=confvar.get('WMSMON_DB_PWD'),
                                db=confvar.get('WMSMON_DB_NAME'))

    except Exception, e:
        stri2 = "ERROR CONNECTING TO WMSMonitor DB: " + str(e)
        logger.error(stri2)
        logger.error(
            "ERROR: Please check mysql daemon is running and connection parameters are correct!"
        )
        sys.exit(1)
Ejemplo n.º 2
0
def host_usagetest_consumer():
        '''host_usagetest_consumer() -> takes usage test results from producers of  
           such a metric ( 1-Nagios, 2-url of a UI) and populates WMSMonitor database
        '''

        import os, commands, sys, fpformat
        sys.path.append('../common')
        import time
        import datetime
        import readconf_func
        import logging
        import socket
        import MySQLdb
        import urllib

        confvar=readconf_func.readconf()

        #CONNECTING TO DB
        #Opening myslq db connection
        logger.info("Starting db connection")
        try:
            db = MySQLdb.connection(host=confvar.get('WMSMON_DB_HOST'),user=confvar.get('WMSMON_DB_USER'),passwd=confvar.get('WMSMON_DB_PWD'),db=confvar.get('WMSMON_DB_NAME'))

        except Exception,e:
            stri2= "ERROR CONNECTING TO WMSMonitor DB: " + str(e)
            logger.error(stri2)
            logger.error("ERROR: Please check mysql daemon is running and connection parameters are correct!")
            sys.exit(1)
Ejemplo n.º 3
0
def wms_balancing_arbiter():
        '''wms_balancing_arbiter() -> updating wms instances available behind an alias
           depending on the load of the instances according to the load metric provided by
           wms_balancing_metric function 
           Return None if errors are raised during calculation.
        '''

        import os, commands, sys, fpformat
        sys.path.append('../common')
        import time
        import datetime
        import readconf_func
        import logging
        import socket
        import MySQLdb
        import logpredef

        logger = logging.getLogger('wms_balancing_arbiter')
        conf=readconf_func.readconf()

        #+++++++++++++++++++++++++++++
        #Opening myslq db connection        
        try:
              db = MySQLdb.connection(host=conf.get('WMSMON_DB_HOST'),user=conf.get('WMSMON_DB_USER'),passwd=conf.get('WMSMON_DB_PWD'),db=conf.get('WMSMON_DB_NAME'))
              logger.info("Starting db connection")
        except Exception,e:
              strxx= "ERROR CONNECTING TO WMSMonitor DB: " + str(e)
              logger.error(strxx)
              logger.error("ERROR: Please check mysql daemon is running and connection parameters are correct!")
              sys.exit(1)
Ejemplo n.º 4
0
        def run(self):
		#INIZIALIZATION
		logger = logging.getLogger('data_collector')
		TIME_AT_START = time.time()
                logger.info('THIS IS WMSMonitor data_collector_daemon')
		logger.info('Reading wmsmon conf file')
		confvar=readconf_func.readconf();

		#CONNECTING TO DB
		#Opening myslq db connection
		logger.info("Starting db connection")
		try:
		      db = MySQLdb.connection(host=confvar.get('WMSMON_DB_HOST'),user=confvar.get('WMSMON_DB_USER'),passwd=confvar.get('WMSMON_DB_PWD'),db=confvar.get('WMSMON_DB_NAME'))

		except Exception,e:
		      str= "ERROR CONNECTING TO WMSMonitor DB: " + str(e)
		      logger.error(str)
		      logger.error("ERROR: Please check mysql daemon is running and connection parameters are correct!")
                      sys.exit(1)
Ejemplo n.º 5
0
    def run(self):
        #INIZIALIZATION
        logger = logging.getLogger('data_collector')
        TIME_AT_START = time.time()
        logger.info('THIS IS WMSMonitor data_collector_daemon')
        logger.info('Reading wmsmon conf file')
        confvar = readconf_func.readconf()

        #CONNECTING TO DB
        #Opening myslq db connection
        logger.info("Starting db connection")
        try:
            db = MySQLdb.connection(host=confvar.get('WMSMON_DB_HOST'),
                                    user=confvar.get('WMSMON_DB_USER'),
                                    passwd=confvar.get('WMSMON_DB_PWD'),
                                    db=confvar.get('WMSMON_DB_NAME'))

        except Exception, e:
            str = "ERROR CONNECTING TO WMSMonitor DB: " + str(e)
            logger.error(str)
            logger.error(
                "ERROR: Please check mysql daemon is running and connection parameters are correct!"
            )
            sys.exit(1)
Ejemplo n.º 6
0
Archivo: cft.py Proyecto: dcesini/CFT
        def run(self):
                #INIZIALIZATION
                logger = logging.getLogger('cft')
                TIME_AT_START = time.time()
                confvar=readconf_func.readconf();
                logger.info('#########################################')
                logger.info('## This is the CNAF File Transfer tool ##')
                logger.info('#########################################')
                timenow_str = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(TIME_AT_START))
                logger.info('cft.py started on ' + timenow_str)
                logger.info('Configuration file read: ' + confvar['CONF_FILE1'])
                logger.info('Configuration file read: ' + confvar['CONF_FILE2'])
                #############################################################
                ### Look for already running wrapper process
                pname = 'cft.py'
                RUNNING = check_running_func.check_running(pname)
                if RUNNING:
                   logger.error('Another cft.py is running. Aborting')
                   sys.exit(1)
                ##############################################################

                #CHECKING MSG PATHS
                if (os.access(confvar.get('INPUT_FILES_PATH'),os.F_OK) == False):
                    logger.error('NOT EXISTING DIRECTORY: ' + confvar.get('INPUT_FILES_PATH') + '. Please check configuration file\n')
                    sys.exit(1)
                if (os.access(confvar.get('PROCESSED_FILES_PATH'),os.F_OK) == False):
                    logger.error('NOT EXISTING DIRECTORY: ' + confvar.get('PROCESSED_FILES_PATH') + '. Please check configuration file\n')
                    sys.exit(1)

                #Starting daemon
                while True:
                      #Checking for new DATA Messages
                      list=os.listdir(confvar.get('INPUT_FILES_PATH'))
                      if len(list) == 0:
                         logger.info("No new file to process")
                      for msg in list:
                          if (os.access(confvar.get('INPUT_FILES_PATH') + '/' + msg,os.F_OK) == True):
                              #ACCESSING INPUT FILE
                              logger.info('Working on file: ' + msg)
                              msghdl = open(confvar.get('INPUT_FILES_PATH') + '/' + msg,'r')
                              lines = msghdl.readlines()
                              for line in lines:
                                  logger.info(line)
                                  linesp = line.split(' ')
                                  if len(linesp) < 2:
                                     logger.error("File " + msg + " wrongly formatted. Please check manually. Exiting!")
                                     sys.exit(1)
                                  else:
                                     file_in = linesp[0].strip().rstrip()
                                     file_out = linesp[1].strip().rstrip()
                                     logger.info("file_in = " + file_in)
                                     logger.info("file_out = " + file_out)
                                     globus_url_copy_func.globus_url_copy(confvar,file_in,file_out)
                              msghdl.close()
                              status = os.system('mv ' + confvar.get('INPUT_FILES_PATH') + '/' + msg + ' ' + confvar.get('PROCESSED_FILES_PATH'))
                              if status != 0:
                                 logger.error('Cannot move processed file to ' + confvar.get('PROCESSED_FILES_PATH') + '. Please check manually. Exiting!\n')
                                 sys.exit(1)

                      logger.info("Waiting for 5 seconds before checking for new files")
                      time.sleep(5)
Ejemplo n.º 7
0
#!/usr/bin/env python

# J.M. Dana
# [email protected]
import sys, os
#sys.path.append('./stomp/')
import stomp
import time
import logpredef_wmslb
import logging
import readconf_func
from socket import gethostname

logger = logging.getLogger('send_data_to_activemq')

confvar = readconf_func.readconf()
sender_hostname = gethostname()


class MyListener(object):
    def on_connecting(self, host_and_port):
        logger.info('connecting...')
        #self.c.connect(wait=True)

    def on_disconnected(self):
        logger.info("lost connection")

    def on_message(self, headers, body):
        #self.__print_async("MESSAGE", headers, body)
        logger.info('MESSAGE')
Ejemplo n.º 8
0
def wms_balancing_metric(WMS):
    '''wms_balancing_metric() -> returning a list conatining:
           memusage,loadcpulimit,memlimit,disklimit,fdrain,fload,ftraversaltime
           metric for wms load balancing is:
	      <0 if (service is failing)||(service is in drain)
	      >0 if (service is available ) N.B. the higher the number the higher the load on wms
           Return None if errors are raised during calculation.
	'''

    import os, commands, sys, fpformat
    sys.path.append('../../common')
    sys.path.append('../../common/classes')
    import time, urllib
    import datetime
    import readconf_func
    import f_metric_func
    import wms_class
    import socket

    #Initializing
    import logging
    logger = logging.getLogger('wms_balancing_metric')

    def mk_float_or_0(val):
        try:
            fval = float(val)
        except:
            return 0
        return fval

#INITIALIZATION

    confvar = readconf_func.readconf()
    fdrain = 1
    fload = 1
    ftraversaltime = 1
    loadcpulimit = 15
    memlimit = 99
    memusage = 1
    disklimit = 90
    wmsdata = []

    ###########   LOAD BALANCING PARAMETERS    #####################
    LATENCY = 0  #confvar.get('LATENCY')
    LATENCY_PATH = ' '  #confvar.get('LATENCY_PATH')
    SUBMISSION_TEST = 0  # confvar.get('SUBMISSION_TEST')
    NAGIOS_PATH = ' '  #confvar.get('NAGIOS_PATH')
    LOAD_BALANCING_SITE_CONTACT = 'root@localhost'  #confvar.get('LOAD_BALANCING_SITE_CONTACT')
    ####################################################

    #########################################################
    #Calculating fdrain component
    #checks on daemons
    logger.info('checking daemons')
    for dae in WMS.daemons_dict.itervalues():
        if dae != '0' and dae != 'Null':
            logger.info('fdrain = -1 because of daemons:' + str(dae))
            fdrain = -1
            break

    env_script = confvar.get('ENV_FILE')

    #checkung whether the wms has been manually put in drain
    cmddrain = '. ' + env_script + '; echo $GLITE_LOCATION_VAR'
    stddrain = os.popen(cmddrain)
    strtmp = stddrain.readlines()
    drainfile = strtmp[0].strip() + '/.drain'
    if (os.access(drainfile, os.F_OK) == True):
        logger.info('fdrain = -1 because of drainfile presence')
        fdrain = -1

    #checking whether the wms is in autodrain for overload detection
    #cmd = "grep glite_wms_wmproxy_load_monitor /etc/glite-wms/glite_wms.conf |grep jobSubmit"
    cmdwmsconfig = '. ' + env_script + '; echo $GLITE_WMS_CONFIG_DIR'
    stddrain = os.popen(cmdwmsconfig)
    strtmp = stddrain.readlines()
    WMSFILE = strtmp[0].strip() + '/glite_wms.conf'
    cmd = "grep glite_wms_wmproxy_load_monitor " + WMSFILE + " |grep jobSubmit"
    std = os.popen(cmd)
    stdstr1 = std.readlines()
    # if everything is ok....
    if (len(stdstr1) > 0):
        cmd = ". " + env_script + ";" + stdstr1[
            0][stdstr1[0].find("\"") +
               1:stdstr1[0].find("\"", stdstr1[0].find("\"") + 1)]
        logger.info("invoking jobsubmit script: " + cmd)
        status = os.system(cmd + ' > /dev/null 2>&1')
        if (status != 0):
            logger.info(
                'fdrain = -1 because exit status is != 0 for command :')
            logger.info(cmd)
            fdrain = -1
        std = os.popen(cmd)
        stdstr = std.readlines()
        if (len(stdstr) > 0):
            try:
                loadcpulimit = [
                    x for x in stdstr
                    if x.startswith('Threshold for Load Average(15 min)')
                ][0].split()[5].strip()
                memlimit = [
                    x for x in stdstr
                    if x.startswith('Threshold for Memory Usage')
                ][0].split()[4].strip()
                memusage = [
                    x for x in stdstr
                    if x.startswith('Threshold for Memory Usage')
                ][0].split()[len(stdstr[3].split()) - 1].strip('%')
                disklimit = [
                    x for x in stdstr if x.startswith('Threshold for Disk')
                ][0].split()[4].strip('%')

            except:
                logger.error(
                    "Unable to parse /sbin/glite_wms_wmproxy_load_monitor script output "
                )
                return None

    else:
        logger.error("Problem reading glite_wms.conf file")
        return None

    #if status == 1:
    #      fdrain = -1;

    server_hostname = socket.getfqdn()

    #Site Nagios Submission Test
    nagiossubtest = 1
    if SUBMISSION_TEST == '1':
        nagiossubtest = 1
        cmd1 = 'wget -q ' + NAGIOS_PATH
        logger.info("BALANCING COMMAND: " + cmd1)
        if (os.system(cmd1) == 0):
            #checking date
            cmdcheck = 'grep ' + server_hostname + ' ' + NAGIOS_PATH.split(
                '/')[NAGIOS_PATH.count('/')]
            std = os.popen(cmdcheck)
            stdstr = std.readlines()
            if (len(stdstr) > 0):
                timestr = stdstr[0].split('\t')[0]
                deltatime = int(time.time()) - int(
                    time.mktime(time.strptime(timestr, "%Y-%m-%d %H:%M:%S")))
                logger.info("NAGIOS DELTATIME: " + deltatime)
                print "NAGIOS DELTATIME: ", deltatime, '\n'
                if (deltatime < 3600):
                    logger.info("NAGIOS CURRENT STATUS SUBTEST: " +
                                stdstr[0].split('\t')[1])
                    if stdstr[0].split('\t')[1] == '2':
                        nagiossubtest = -1
                    elif stdstr[0].split('\t')[1] == '3':
                        SENDMAIL = "/usr/sbin/sendmail"  # sendmail location
                        p = os.popen("%s -t" % SENDMAIL, "w")
                        p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
                        p.write(
                            "Subject: WARNING NAGIOS LOAD BALANCING SUBMISSION TEST FAILS\n"
                        )
                        p.write(
                            "\n")  # blank line separating headers from body
                        p.write("WARNING: SUBMISSION TEST TOO OLD!!!\n\n\n")
                        p.write("FILE : " + NAGIOS_PATH + "\n")
                        sts = p.close()
                        if sts != 0:
                            logger.info("Sendmail exit status" + str(sts))

                else:
                    SENDMAIL = "/usr/sbin/sendmail"  # sendmail location
                    p = os.popen("%s -t" % SENDMAIL, "w")
                    p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
                    p.write(
                        "Subject: WARNING NAGIOS LOAD BALANCING SUBMISSION TEST FAILS\n"
                    )
                    p.write("\n")  # blank line separating headers from body
                    p.write("WARNING: SUBMISSION TEST TOO OLD!!!\n\n\n")
                    p.write("FILE : " + NAGIOS_PATH + "\n")
                    p.write("For wmsserver: " + server_hostname + "\n")
                    sts = p.close()
                    if sts != 0:
                        logger.info("Sendmail exit status" + str(sts))
            cmdrm = 'rm -f ' + NAGIOS_PATH.split('/')[NAGIOS_PATH.count('/')]
            status = os.system(cmdrm + ' 2&>1')

        else:
            SENDMAIL = "/usr/sbin/sendmail"  # sendmail location
            p = os.popen("%s -t" % SENDMAIL, "w")
            p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
            p.write(
                "Subject: WARNING NAGIOS LOAD BALANCING SUBMISSION TEST FAILS\n"
            )
            p.write("\n")  # blank line separating headers from body
            p.write("WARNING: COULD NOT READ SUBMISSION TEST RESULTS!!!\n")
            p.write("PROBLEMS WHILE DOWNLOADING FILE : " + NAGIOS_PATH + "\n")
            p.write("For wmsserver: " + server_hostname + "\n")
            sts = p.close()
            if sts != 0:
                logger.info("Sendmail exit status" + str(sts))

    if fdrain > 0:
        fdrain = fdrain * nagiossubtest

#CMS Latency Monitor Submission Test
    latencysubtest = 1
    if LATENCY == '1':
        cmd1 = 'wget -q ' + LATENCY_PATH + server_hostname + '.log'
        if (os.system(cmd1) == 0):
            #checking date
            cmdcheck = 'tail -1 submit-tracks_' + server_hostname + '.log |awk \'{print $1}\''
            std = os.popen(cmdcheck)
            stdstr = std.readlines()
            if (len(stdstr) > 0):
                timestr = stdstr[0].strip(':\n')
                deltatime = int(time.time()) - int(
                    time.mktime(time.strptime(timestr, "%Y-%m-%d@%H.%M.%S")))

            if deltatime < 1800:
                cmd2 = 'tail -1 submit-tracks_' + server_hostname + '.log|grep -c " done in"'
                std = os.popen(cmd2)
                stdstr = std.readlines()
                if (len(stdstr) > 0):
                    latencysubtest = int(stdstr[0].strip('\n'))
                    if latencysubtest == 0:
                        latencysubtest = -1
            else:
                SENDMAIL = "/usr/sbin/sendmail"  # sendmail location
                p = os.popen("%s -t" % SENDMAIL, "w")
                p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
                p.write(
                    "Subject: WARNING LATENCY LOAD BALANCING SUBMISSION TEST FAILS\n"
                )
                p.write("\n")  # blank line separating headers from body
                p.write("WARNING: SUBMISSION TEST TOO OLD!!!\n\n\n")
                p.write("FILE : " + LATENCY_PATH + server_hostname + ".log\n")
                sts = p.close()
                if sts != 0:
                    logger.info("Sendmail exit status" + str(sts))
        else:
            SENDMAIL = "/usr/sbin/sendmail"  # sendmail location
            p = os.popen("%s -t" % SENDMAIL, "w")
            p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
            p.write(
                "Subject: WARNING LATENCY LOAD BALANCING SUBMISSION TEST FAILS\n"
            )
            p.write("\n")  # blank line separating headers from body
            p.write("WARNING: COULD NOT READ SUBMISSION TEST RESULTS!!!\n")
            p.write("PROBLEMS WHILE DOWNLOADING FILE : " + LATENCY_PATH +
                    server_hostname + ".log\n")
            sts = p.close()
            if sts != 0:
                logger.info("Sendmail exit status" + str(sts))

    if fdrain > 0:
        fdrain = fdrain * latencysubtest

    #Calculating load metric
    logger.info("Building load metric")
    #	print disklimit

    fload = f_metric_func.f_metric(mk_float_or_0(
        WMS['cpu_load']), loadcpulimit, 0) + f_metric_func.f_metric(
            memusage, memlimit, 1) + f_metric_func.f_metric(
                mk_float_or_0(WMS['disk_sandbox']), disklimit, 1)

    #Calculating traversaltime  metric
    ftraversaltime = min(
        f_metric_func.f_metric(mk_float_or_0(WMS['wm_queue']), 500, 0), 1
    ) + min(f_metric_func.f_metric(mk_float_or_0(
        WMS['jc_queue']), 500, 0), 1) + min(
            f_metric_func.f_metric(mk_float_or_0(WMS['lb_event']), 3000, 0), 1)

    #summing metric components

    if fdrain > 0:
        load_balancing_metric = fload + ftraversaltime
    else:
        load_balancing_metric = fdrain

    #writing resulting metric to file
    filename = confvar.get('LOAD_BALANCING_FILENAME')
    try:
        logger.info('Trying to open file : ' + filename)
        f = open(filename, mode='a')

        logger.info("writing load balancing metric to file: fdrain=" +
                    str(fdrain) + ", fload= " + str(fload) +
                    ", ftraversaltime= " + str(ftraversaltime))
        f.write(str(load_balancing_metric) + '\n')
        f.close()
    except IOerror:
        logger.error('CANNOT ACCESS FILE : ' + filename)

#{'wm_queue':null_str,'jc_queue':null_str,'lb_event':null_str,'loadb_fdrain':null_str,'loadb_ftraversaltime':null_str,'loadb_fload':null_str,'loadb_fmetric':null_str,'condor_running':null_str,'condor_idle':null_str,'condor_current':null_str,'ism_size':null_str,'ism_entries':null_str,'gftp_con':null_str,'FD_WM':null_str,'FD_LM':null_str,'FD_JC':null_str,'FD_LL':null_str,'loadb_memusage':null_str,'ice_running':null_str,'ice_idle':null_str,'ice_pending':null_str,'ice_held':null_str,'ice_queue':null_str,'cpu_load':null_str}
    WMS['loadb_memusage'] = float(memusage) / float(memlimit)
    #WMS['loadcpulimit'] = loadcpulimit
    #WMS['memlimit'] = memlimit  #removed from class to be returned as %
    #WMS['disklimit'] = disklimit # removed from class to be returned as %
    WMS['loadb_fdrain'] = fdrain
    WMS['loadb_fload'] = fload
    WMS['loadb_ftraversaltime'] = ftraversaltime
    WMS['loadb_fmetric'] = load_balancing_metric
    #        logger.info('memusage: ' + str(WMS['memusage']))
    #        logger.info('fdrain: '+ str(WMS['fdrain']))
    #        logger.info('fload: '+ str(WMS['fload']))
    #        logger.info('ftraversaltime: '+ str(WMS['ftraversaltime']))
    #        logger.info('fmetric: '+ str(WMS['fmetric']))

    #f=open('/tmp/loadbalancingtest.txt', mode = 'a')
    #strtest=  str(int(time.time())) + ' ' + str(load_balancing_metric) + ' ' + str(fdrain) + ' ' + str(fload) + ' ' + str(ftraversaltime) + ' ' + str(memusage) + ' ' + str(memlimit) + ' ' + WMS['load'] + ' ' + str(loadcpulimit) + ' ' + WMS['sandbox'] + ' ' + str(disklimit) + ' ' + WMS['input_fl'] + ' ' + '1000' + ' ' + WMS['queue_fl'] + ' ' + '1000' + ' ' + WMS['dg20'] + ' ' + '3000'
    #f.write(strtest + '\n')
    f.close()
    cmd3 = 'rm -f submit-tracks_' + server_hostname + '.log'
    os.system(cmd3)

    return 0
Ejemplo n.º 9
0
def wms_balancing_metric(WMS):
	'''wms_balancing_metric() -> returning a list conatining:
           memusage,loadcpulimit,memlimit,disklimit,fdrain,fload,ftraversaltime
           metric for wms load balancing is:
	      <0 if (service is failing)||(service is in drain)
	      >0 if (service is available ) N.B. the higher the number the higher the load on wms
           Return None if errors are raised during calculation.
	'''

	import os, commands, sys, fpformat
        sys.path.append('../../common')
        sys.path.append('../../common/classes')
	import time, urllib
	import datetime
	import readconf_func
	import f_metric_func
        import wms_class
        import socket 

	#Initializing 
        import logging
        logger = logging.getLogger('wms_balancing_metric')


        def mk_float_or_0(val):
           try:
              fval = float(val)
           except:
              return 0
           return fval

        #INITIALIZATION
	confvar=readconf_func.readconf()
	fdrain=1
	fload=1
	ftraversaltime=1
        loadcpulimit = 15
        memlimit = 99
        memusage = 1
        disklimit = 90
        wmsdata = []


        ###########   LOAD BALANCING PARAMETERS    #####################
        LATENCY = 0 #confvar.get('LATENCY')
        LATENCY_PATH = ' ' #confvar.get('LATENCY_PATH')
        SUBMISSION_TEST = 0 # confvar.get('SUBMISSION_TEST')
        NAGIOS_PATH = ' ' #confvar.get('NAGIOS_PATH')
        LOAD_BALANCING_SITE_CONTACT = 'root@localhost' #confvar.get('LOAD_BALANCING_SITE_CONTACT')
        ####################################################


        #########################################################
	#Calculating fdrain component
	#checks on daemons
        logger.info('checking daemons')
        for dae in WMS.daemons_dict.itervalues():
           if dae != '0' and dae != 'Null':
              logger.info('fdrain = -1 because of daemons:' + str(dae))
              fdrain = -1
              break
         
        env_script = confvar.get('ENV_FILE')

        #checkung whether the wms has been manually put in drain 
        cmddrain = '. ' + env_script + '; echo $GLITE_LOCATION_VAR'
        stddrain = os.popen(cmddrain)
        strtmp = stddrain.readlines()
        drainfile = strtmp[0].strip() + '/.drain'
	if  (os.access(drainfile,os.F_OK) == True):
              logger.info('fdrain = -1 because of drainfile presence')
              fdrain = -1

	#checking whether the wms is in autodrain for overload detection
        #cmd = "grep glite_wms_wmproxy_load_monitor /etc/glite-wms/glite_wms.conf |grep jobSubmit"
        cmdwmsconfig = '. ' + env_script + '; echo $GLITE_WMS_CONFIG_DIR'
        stddrain = os.popen(cmdwmsconfig)
        strtmp = stddrain.readlines()
        WMSFILE = strtmp[0].strip() + '/glite_wms.conf'
	cmd = "grep glite_wms_wmproxy_load_monitor " + WMSFILE + " |grep jobSubmit"
	std = os.popen(cmd)
        stdstr1 =  std.readlines() 
# if everything is ok....
	if ( len(stdstr1) > 0 ):
	      cmd= ". " + env_script + ";" + stdstr1[0][stdstr1[0].find("\"")+1:stdstr1[0].find("\"", stdstr1[0].find("\"")+1)] 
              logger.info("invoking jobsubmit script: " +cmd)
	      status=os.system(cmd + ' > /dev/null 2>&1')
              if (status != 0):
                      logger.info('fdrain = -1 because exit status is != 0 for command :')
                      logger.info(cmd)
                      fdrain=-1
              std = os.popen(cmd)
              stdstr =  std.readlines()
   	      if ( len(stdstr) > 0 ) :
		      try:
                           loadcpulimit=[x for x in stdstr if x.startswith('Threshold for Load Average(15 min)')][0].split()[5].strip()
                           memlimit=[x for x in stdstr if x.startswith('Threshold for Memory Usage')][0].split()[4].strip()
                           memusage=[x for x in stdstr if x.startswith('Threshold for Memory Usage')][0].split()[len(stdstr[3].split())-1].strip('%')                    
                           disklimit=[x for x in stdstr if x.startswith('Threshold for Disk')][0].split()[4].strip('%')

		      except:
                           logger.error("Unable to parse /sbin/glite_wms_wmproxy_load_monitor script output ")
                           return None

	else : 
  	      logger.error("Problem reading glite_wms.conf file")
              return None
	
	#if status == 1:
	#      fdrain = -1;

        server_hostname = socket.getfqdn()

        #Site Nagios Submission Test       
        nagiossubtest = 1
        if SUBMISSION_TEST == '1':
                nagiossubtest = 1
                cmd1 = 'wget -q ' + NAGIOS_PATH
                logger.info("BALANCING COMMAND: " + cmd1 )
                if ( os.system(cmd1) == 0 ):
                   #checking date
                   cmdcheck = 'grep ' + server_hostname + ' ' + NAGIOS_PATH.split('/')[NAGIOS_PATH.count('/')]
                   std = os.popen(cmdcheck)
                   stdstr =  std.readlines()
                   if ( len(stdstr) > 0 ):
                      timestr = stdstr[0].split('\t')[0] 
                      deltatime = int(time.time())-int(time.mktime(time.strptime(timestr,"%Y-%m-%d %H:%M:%S")))
                      logger.info("NAGIOS DELTATIME: " + deltatime )
                      print "NAGIOS DELTATIME: ", deltatime,'\n' 
                      if (deltatime < 3600) :
                              logger.info("NAGIOS CURRENT STATUS SUBTEST: " + stdstr[0].split('\t')[1] )
                              if stdstr[0].split('\t')[1] == '2':
                                      nagiossubtest = -1
                              elif stdstr[0].split('\t')[1] == '3':
				      SENDMAIL = "/usr/sbin/sendmail" # sendmail location
				      p = os.popen("%s -t" % SENDMAIL, "w")
				      p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
				      p.write("Subject: WARNING NAGIOS LOAD BALANCING SUBMISSION TEST FAILS\n")
				      p.write("\n") # blank line separating headers from body
				      p.write("WARNING: SUBMISSION TEST TOO OLD!!!\n\n\n")
				      p.write("FILE : " + NAGIOS_PATH +  "\n")
				      sts = p.close()
				      if sts != 0:
                                         logger.info("Sendmail exit status" + str(sts))

                      else:
			      SENDMAIL = "/usr/sbin/sendmail" # sendmail location
			      p = os.popen("%s -t" % SENDMAIL, "w")
			      p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
			      p.write("Subject: WARNING NAGIOS LOAD BALANCING SUBMISSION TEST FAILS\n")
			      p.write("\n") # blank line separating headers from body
			      p.write("WARNING: SUBMISSION TEST TOO OLD!!!\n\n\n")
			      p.write("FILE : " + NAGIOS_PATH + "\n")
                              p.write("For wmsserver: " + server_hostname + "\n")
                              sts = p.close()
                              if sts != 0:
                                 logger.info("Sendmail exit status" + str(sts))
                   cmdrm = 'rm -f ' + NAGIOS_PATH.split('/')[NAGIOS_PATH.count('/')]
                   status=os.system(cmdrm + ' 2&>1')

                else :
                   SENDMAIL = "/usr/sbin/sendmail" # sendmail location
                   p = os.popen("%s -t" % SENDMAIL, "w")
                   p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
                   p.write("Subject: WARNING NAGIOS LOAD BALANCING SUBMISSION TEST FAILS\n")
                   p.write("\n") # blank line separating headers from body
                   p.write("WARNING: COULD NOT READ SUBMISSION TEST RESULTS!!!\n")
                   p.write("PROBLEMS WHILE DOWNLOADING FILE : " + NAGIOS_PATH + "\n")
                   p.write("For wmsserver: " + server_hostname + "\n")
                   sts = p.close()
                   if sts != 0:
                      logger.info("Sendmail exit status" + str(sts))

        if fdrain > 0:
               fdrain = fdrain * nagiossubtest
 
        #CMS Latency Monitor Submission Test
        latencysubtest = 1
        if LATENCY == '1':
		cmd1 = 'wget -q ' + LATENCY_PATH + server_hostname + '.log'
		if ( os.system(cmd1) == 0 ):
		   #checking date
		   cmdcheck = 'tail -1 submit-tracks_' + server_hostname + '.log |awk \'{print $1}\''
		   std = os.popen(cmdcheck)
		   stdstr =  std.readlines()
		   if ( len(stdstr) > 0 ):
		      timestr = stdstr[0].strip(':\n')
		      deltatime = int(time.time())-int(time.mktime(time.strptime(timestr,"%Y-%m-%d@%H.%M.%S")))

		   if deltatime < 1800:
		      cmd2 = 'tail -1 submit-tracks_' + server_hostname + '.log|grep -c " done in"'
		      std = os.popen(cmd2)
		      stdstr =  std.readlines()
		      if ( len(stdstr) > 0 ) :
			 latencysubtest = int(stdstr[0].strip('\n'))
			 if latencysubtest == 0:
			    latencysubtest = -1
		   else:
		      SENDMAIL = "/usr/sbin/sendmail" # sendmail location
		      p = os.popen("%s -t" % SENDMAIL, "w")
		      p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
		      p.write("Subject: WARNING LATENCY LOAD BALANCING SUBMISSION TEST FAILS\n")
		      p.write("\n") # blank line separating headers from body
		      p.write("WARNING: SUBMISSION TEST TOO OLD!!!\n\n\n")
		      p.write("FILE : " + LATENCY_PATH + server_hostname + ".log\n")
		      sts = p.close()
		      if sts != 0:
			 logger.info("Sendmail exit status" + str(sts))
		else :
		   SENDMAIL = "/usr/sbin/sendmail" # sendmail location
		   p = os.popen("%s -t" % SENDMAIL, "w")
                   p.write("To: " + LOAD_BALANCING_SITE_CONTACT + "\n")
		   p.write("Subject: WARNING LATENCY LOAD BALANCING SUBMISSION TEST FAILS\n")
		   p.write("\n") # blank line separating headers from body
		   p.write("WARNING: COULD NOT READ SUBMISSION TEST RESULTS!!!\n")
		   p.write("PROBLEMS WHILE DOWNLOADING FILE : " + LATENCY_PATH + server_hostname + ".log\n")
		   sts = p.close()
		   if sts != 0:
		      logger.info("Sendmail exit status" + str(sts))

        if fdrain > 0:
               fdrain = fdrain * latencysubtest

	#Calculating load metric
	logger.info("Building load metric")
#	print disklimit

	fload = f_metric_func.f_metric( mk_float_or_0(WMS['cpu_load']) , loadcpulimit, 0 ) + f_metric_func.f_metric( memusage , memlimit, 1 ) + f_metric_func.f_metric( mk_float_or_0(WMS['disk_sandbox']) , disklimit , 1 )

	#Calculating traversaltime  metric
	ftraversaltime =  min(f_metric_func.f_metric( mk_float_or_0(WMS['wm_queue']) , 500 , 0 ) , 1 ) + min(f_metric_func.f_metric(mk_float_or_0(WMS['jc_queue']) , 500 , 0 ) , 1 ) + min(f_metric_func.f_metric( mk_float_or_0(WMS['lb_event']) , 3000 , 0 ) , 1 )

	#summing metric components 
 
	if fdrain > 0:
		load_balancing_metric = fload + ftraversaltime
	else:
	        load_balancing_metric = fdrain

	#writing resulting metric to file
	filename = confvar.get('LOAD_BALANCING_FILENAME')
        try:
            logger.info('Trying to open file : ' + filename )
 	    f=open(filename, mode = 'a')
        
	    logger.info("writing load balancing metric to file: fdrain=" + str(fdrain) + ", fload= " + str(fload) + ", ftraversaltime= " + str(ftraversaltime))
	    f.write(str(load_balancing_metric) + '\n')
	    f.close()
        except IOerror :
            logger.error('CANNOT ACCESS FILE : ' + filename ) 

#{'wm_queue':null_str,'jc_queue':null_str,'lb_event':null_str,'loadb_fdrain':null_str,'loadb_ftraversaltime':null_str,'loadb_fload':null_str,'loadb_fmetric':null_str,'condor_running':null_str,'condor_idle':null_str,'condor_current':null_str,'ism_size':null_str,'ism_entries':null_str,'gftp_con':null_str,'FD_WM':null_str,'FD_LM':null_str,'FD_JC':null_str,'FD_LL':null_str,'loadb_memusage':null_str,'ice_running':null_str,'ice_idle':null_str,'ice_pending':null_str,'ice_held':null_str,'ice_queue':null_str,'cpu_load':null_str}
        WMS['loadb_memusage'] = float(memusage)/float(memlimit)
        #WMS['loadcpulimit'] = loadcpulimit
        #WMS['memlimit'] = memlimit  #removed from class to be returned as %
        #WMS['disklimit'] = disklimit # removed from class to be returned as %
        WMS['loadb_fdrain'] = fdrain
        WMS['loadb_fload'] = fload
        WMS['loadb_ftraversaltime'] = ftraversaltime
        WMS['loadb_fmetric'] = load_balancing_metric
#        logger.info('memusage: ' + str(WMS['memusage']))
#        logger.info('fdrain: '+ str(WMS['fdrain']))
#        logger.info('fload: '+ str(WMS['fload']))
#        logger.info('ftraversaltime: '+ str(WMS['ftraversaltime']))
#        logger.info('fmetric: '+ str(WMS['fmetric']))

        #f=open('/tmp/loadbalancingtest.txt', mode = 'a')
        #strtest=  str(int(time.time())) + ' ' + str(load_balancing_metric) + ' ' + str(fdrain) + ' ' + str(fload) + ' ' + str(ftraversaltime) + ' ' + str(memusage) + ' ' + str(memlimit) + ' ' + WMS['load'] + ' ' + str(loadcpulimit) + ' ' + WMS['sandbox'] + ' ' + str(disklimit) + ' ' + WMS['input_fl'] + ' ' + '1000' + ' ' + WMS['queue_fl'] + ' ' + '1000' + ' ' + WMS['dg20'] + ' ' + '3000'
        #f.write(strtest + '\n')
        f.close()
        cmd3 = 'rm -f submit-tracks_' + server_hostname + '.log'
        os.system(cmd3)
      
        return 0
Ejemplo n.º 10
0
nday=int(sys.argv[1])
if nday > 0:
   print "\nUsage:\n"
   print "globus_error_detector.py NDAYAGO\n"
   print "ERROR: NDAYAGO must be <=0 "
   sys.exit(1)

SENSORFLAG=int(sys.argv[2])
if ((SENSORFLAG != 0) and (SENSORFLAG != 1)):
   print "\nUsage:\n"
   print "globus_error_detector.py NDAYAGO SENSORFLAG\n"
   print "ERROR: SENSORFLAG must be either 0 or 1 "
   sys.exit(1)

import readconf_func
confvar=readconf_func.readconf();

if SENSORFLAG:

        #JUST PRODUCING DATA FOR WMSMonitor SENSOR
        data = "\"" + time.strftime("%d %b",time.localtime(time.time()+nday*84600)) + "\""
#        datafile=time.strftime("%d%b",time.localtime(time.time()+nday*84600))
        datacalc=time.localtime(time.time()+nday*84600)[1:3]
        print "Globus error detector starts on date: " + data + "\""
#        confvar={'GLITE_LOG_DIR':'/var/log/glite','SITE_CONTACT': '[email protected],[email protected],[email protected]'}

        logfile = '' 
        if ((os.access(confvar.get('GLITE_LOG_DIR') + '/logmonitor_events.log',os.F_OK) == True ) and
            (os.access(confvar.get('GLITE_LOG_DIR') + '/logmonitor_events.log.1',os.F_OK) == False)):
           logfile = confvar.get('GLITE_LOG_DIR') + '/logmonitor_events.log'
Ejemplo n.º 11
0
def lb_query(lbhost,STARTDATE,ENDDATE,DBTYPE):

   #Initializing logger
   import logging
   logger = logging.getLogger('lb_query')

   confvar = readconf_func.readconf();

   users_stats = []   
   # Establish a connection

   if DBTYPE == 'LBPROXY':
      lbhost = confvar['LBPROXY_DB_HOST']
      dbuser = confvar['LBPROXY_DB_USER']
      dbname = confvar['LBPROXY_DB_NAME']
   elif DBTYPE == 'LBSERVER':
      lbhost = confvar['LB_DB_HOST']
      dbuser = confvar['LB_DB_USER']
      dbname = confvar['LB_DB_NAME']

   logger.info('Establishing a connection with mysql DB')
   db = MySQLdb.connection(host = lbhost , user = dbuser , db = dbname, passwd = confvar['SERVER_MYSQL_PASSWORD'][1:-1])

################ MAIN DATA CONTAINER LIST INITIALIZATION ######
   wmsdata_list = []
###############################################################

   def put_into_wmsdata(wmsdata_list,wmshostname,userdn,fieldlist,valuelist):
      wmsFOUND = False
      for wmsdata in wmsdata_list:
         if wmsdata.host == wmshostname:
            wmsFOUND = True
            try:
               wmsdata.add_user(userdn)
            except wmsdata_class.UserPresent:
#              logger.warning('User Already present in wmdata for host: ' + wmsdata.host)
               for field in fieldlist:
                  wmsdata[userdn][field] = valuelist[fieldlist.index(field)]
      if not wmsFOUND:
         wmsdata = wmsdata_class.wmsdata(wmshostname)
         wmsdata.add_user(userdn)
         for field in fieldlist:
            wmsdata[userdn][field] = valuelist[fieldlist.index(field)]
         wmsdata_list.append(wmsdata)


   # Run a MySQL query to find the number of single jobs submitted in a given time interval PER USER and PER WMS
   logger.info('Running a MySQL query to find the number of single jobs submitted in a given time interval PER USER and PER WMS')
   querystr = "select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where events.event=short_fields.event and code='17' and time_stamp>'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and name='NSUBJOBS' and value='0' group by users.cert_subj,host;"
   logger.info('Query is : ' + querystr)   
   db.query(querystr)
   r = db.store_result()
   # Iterate through the result set
   WMP_in = 0
   if r:
      for i in range(1,r.num_rows() + 1):
        row = r.fetch_row()
      #  logger.debug('FOUND ROW: ' + row )
        if row:
          dn = row[0][0]
          rowhost = row[0][1]
          rowWMP_in = row[0][2]

          put_into_wmsdata(wmsdata_list,rowhost,dn,['WMP_in'],[rowWMP_in])          

######################################################################################################################
### We decided to take anymore the avg and the std of nodes per collection because they are not summable on more lb   
### WHat we do is to take PER USER the total number of jobs in collection, the min and max of nodes per collection
### This are summable and avg calculation can be done on collector side
### Anyway we sum over user on sensors side and we return alse the total number of jobs per collection, min and max of nodes PER WMS
### Summing over wmsdata data will be done at the end of this function ore on the wrapper if the wmsdata_list is returned
##########################################################################################################################

# Run a query to find per user and per host the number of collection, the total number of nodes in collection the min and max of nodes per collection

   logger.info('Running a query to find per user and per host the number of collection, the total number of nodes in collection the min and max of nodes per collection')
   querystr = "select users.cert_subj, host, COUNT(value), sum(value), min(value),max(value) from events,short_fields inner join users on events.userid=users.userid where events.event=short_fields.event and code='17' and time_stamp>'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and name='NSUBJOBS' and short_fields.event='0' and value>'0' group by users.cert_subj,host"
   logger.info('Query is : ' + querystr)
   db.query(querystr)
   r = db.store_result()
   # Iterate through the result set
   if r:
      for i in range(1,r.num_rows() + 1):
         row = r.fetch_row()
         if row:
            dn = row[0][0]
            rowhost = row[0][1]
            rowWMP_in_col = row[0][2]
            rowWMP_in_col_nodes = row[0][3]
            rowWMP_in_col_min_nodes = row[0][4]
            rowWMP_in_col_max_nodes = row[0][5]

            put_into_wmsdata(wmsdata_list,rowhost,dn,['WMP_in_col','WMP_in_col_nodes','WMP_in_col_min_nodes','WMP_in_col_max_nodes'],[rowWMP_in_col,rowWMP_in_col_nodes,rowWMP_in_col_min_nodes,rowWMP_in_col_max_nodes])
                 
#  Run a query to find PER USER and PER WMS the number of jobs enqued to WM from WMP in a given time interval
   logger.info("Run a query to find PER USER and PER WMS the number of jobs enqued to WM from WMP in a given time interval")
   querystr = "select  users.cert_subj, host, COUNT(events.jobid) from events,short_fields inner join users on events.userid=users.userid where events.event=short_fields.event and code='4' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and events.event=short_fields.event and  prog='NetworkServer' and name='RESULT' and value='OK' group by users.cert_subj,host;"
   logger.info('Query is : ' + querystr)
   db.query(querystr)
   r = db.store_result()
   if r:
      for i in range(1,r.num_rows() + 1):
        row = r.fetch_row()
        if row:
          dn = row[0][0]
          rowhost = row[0][1]
          rowWM_in = row[0][2]

          put_into_wmsdata(wmsdata_list,rowhost,dn,['WM_in'],[rowWM_in])

   # Run a MySQL query to find the number both collection and single jobs enqueued to WM in a given time interval from LogMonitor (i.e. Resubmitted)
   logger.info('Run a MySQL query to find the number both collection and single jobs enqueued to WM in a given time interval from LogMonitor (i.e. Resubmitted) PER USER and PER WMS')
   querystr="select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where code='4' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and events.event=short_fields.event and name='RESULT' and value='OK' and prog='LogMonitor' group by users.cert_subj, host;"
   logger.info('Query is : ' + querystr)   
   db.query(querystr)
   r = db.store_result()
   # Iterate through the result set
   if r:
      for i in range(1,r.num_rows() + 1):
        row = r.fetch_row()
        if row:
          usernew = row[0][0]
          index = row[0][0].find('/CN=proxy/CN=proxy')
          if index != -1:
             usernew=row[0][0][0:index]
          dn = usernew
          rowhost = row[0][1]
          rowWM_in_res = row[0][2]
          
          put_into_wmsdata(wmsdata_list,rowhost,dn,['WM_in_res'],[rowWM_in_res])

   # Run a MySQL query to find the number single jobs enqueued to Job Controller from WM in a given time interval PER WMS and PER USER
   logger.info('Run a MySQL query to find the number single jobs enqueued to Job Controller from WM in a given time interval per USER and PER WMS')
   querystr="select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where code='4' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and events.event=short_fields.event and name='RESULT' and value='OK' and prog='WorkloadManager' group by users.cert_subj,host;"
   logger.info('Query is : ' + querystr)   
   db.query(querystr)
   r = db.store_result()
   # Iterate through the result set
   if r:
      for i in range(1,r.num_rows() + 1):
        row = r.fetch_row()
        if row:
          usernew = row[0][0]
          index = row[0][0].find('/CN=proxy/CN=proxy')
          if index != -1:
             usernew=row[0][0][0:index]
          dn = usernew
          rowhost = row[0][1]
          rowJC_in = row[0][2]

          put_into_wmsdata(wmsdata_list,rowhost,dn,['JC_in'],[rowJC_in])

   # Run a MySQL query to find the number single jobs enqueued to Condor from Job Controller in a given time interval PER USER and PER WMS
   logger.info('Run a MySQL query to find the number single jobs enqueued to Condor from Job Controller in a given time interval PER USER and PER WMS')
   querystr="select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where code='1' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and events.event=short_fields.event and name='RESULT' and value='OK' and prog='JobController' group by users.cert_subj,host;"
   logger.info('Query is : ' + querystr)   
   db.query(querystr)
   r = db.store_result()
   # Iterate through the result set
   if r:
      for i in range(1,r.num_rows() + 1):
        row = r.fetch_row()
        if row:
          usernew = row[0][0]
          index = row[0][0].find('/CN=proxy/CN=proxy')
          if index != -1:
             usernew=row[0][0][0:index]
          dn = usernew
          rowhost = row[0][1]
          rowJC_out = row[0][2]

          put_into_wmsdata(wmsdata_list,rowhost,dn,['JC_out'],[rowJC_out])

  # Run a MySQL query to find the number of jobs done in a given time interval PER USER and PER WMS
   logger.info('Run a MySQL query to find the number single jobs done successfully in a given time interval PER USER and PER WMS')
   querystr="select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where events.jobid=short_fields.jobid and code='10' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and prog='LogMonitor' and name='REASON' and (value='Job terminated successfully' or value='Job Terminated Successfully') group by users.cert_subj,host;"
   logger.info('Query is : ' + querystr)
   db.query(querystr)
   r = db.store_result()
   # Iterate through the result set
   if r:
      for i in range(1,r.num_rows() + 1):
        row = r.fetch_row()
        if row:
          usernew = row[0][0]
          index = row[0][0].find('/CN=proxy/CN=proxy')
          if index != -1:
             usernew=row[0][0][0:index]
          dn = usernew
          rowhost = row[0][1]
          rowJOB_DONE = row[0][2]

          put_into_wmsdata(wmsdata_list,rowhost,dn,['JOB_DONE'],[rowJOB_DONE])

  # Run a MySQL query to find the number of jobs aborted in a given time interval PER USER and PER WMS
   logger.info('Run a MySQL query to find the number single jobs aborted in a given time interval PER USER and PER WMS')
   querystr="select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events inner join users on events.userid=users.userid where code='12' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' group by users.cert_subj,host;"

   logger.info('Query is : ' + querystr)
   db.query(querystr)
   r = db.store_result()
   # Iterate through the result set
   if r:
      for i in range(1,r.num_rows() + 1):
        row = r.fetch_row()
        if row:
          usernew = row[0][0]
          index = row[0][0].find('/CN=proxy/CN=proxy')
          if index != -1:
             usernew=row[0][0][0:index]
          dn = usernew
          rowhost = row[0][1]
          rowJOB_ABORTED = row[0][2]

          put_into_wmsdata(wmsdata_list,rowhost,dn,['JOB_ABORTED'],[rowJOB_ABORTED])

# Run a MySQL query to find the DEST_CE of jobs in a given time interval PER WMS
   logger.info('Run a MySQL query to find  DEST_CE of jobs in a given time interval PER WMS')

##### old ce query - this double counts ce for jobs landed onto cream ce

   #querystr="select value, host, COUNT(value) from (select DISTINCT(short_fields.event),events.jobid, short_fields.value, host from events,short_fields where events.jobid=short_fields.jobid  and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and prog='WorkloadManager' and name='DEST_HOST' and value!='localhost' and value!='unavailable' and code='15') as temp group by value, host;"
##################################################

##### New query not to double counting ce for jobs landed onto cream ce
   querystr="select value,host,  count(value) from (select distinct(short_fields.jobid), value, host from short_fields inner join events where events.code='15' and events.prog = 'WorkloadManager' and name='DEST_HOST' and time_stamp > '" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and value!='localhost' and value!='unavailable' and events.jobid=short_fields.jobid) as temp group by value, host;"
##################################################

   logger.info('Query is : ' + querystr)
   db.query(querystr)
   r = db.store_result()
   # Iterate through the result set
   if r:
      for i in range(1,r.num_rows() + 1):
         row = r.fetch_row()
         if row:
            rowCE      = row[0][0]
            rowhost    = row[0][1]
            rowCEcount = row[0][2]
            wmsFOUND = False
            for wmsdata in wmsdata_list:
               if wmsdata.host == rowhost:
                  wmsFOUND = True
                  try:
                     wmsdata.add_ce(rowCE)
                     wmsdata.add_ce_count(rowCE,rowCEcount)
                  except wmsdata_class.CEPresent:
#                   logger.warning('User Already present in wmdata for host: ' + wmsdata.host)
                    wmsdata.add_CE_count(rowCEcount)
            if not wmsFOUND:
               wmsdata = wmsdata_class.wmsdata(rowhost)
               wmsdata.add_ce(rowCE)
               wmsdata.add_ce_count(rowCE,rowCEcount)
               wmsdata_list.append(wmsdata)



# Run a MySQL query to find the LB used to store the jobs in a given time interval 
# Available only if DBTYPE = LBPROXY
 
   if DBTYPE == 'LBPROXY':
      logger.info('Run a MySQL query to find the LB used to store the jobs in a given time interval')
      querystr="select distinct dg_jobid from jobs inner join events on jobs.jobid=events.jobid where events.code = '17' and time_stamp > '" + STARTDATE + "' and time_stamp < '" + ENDDATE + "';"
      logger.info('Query is : ' + querystr)
      db.query(querystr)
      r = db.store_result()
   # Iterate through the result set
      if r:
         for i in range(1,r.num_rows() + 1):
            row = r.fetch_row()
            if row:
               rowLB      = row[0][0]
               LBstr = LBstr = rowLB[rowLB.find('//') + 2 : rowLB.find(':9000') ]
               for wmsdata in wmsdata_list:
                  wmsdata.add_lb(LBstr)

   db.close()
  
#   filename= confvar['INSTALL_PATH'] +'/sensors//tmp/USERSTATS_' +  lbhost + '_' + wmshost + '.txt'

#   fileusersstats = open(filename,'w')
#   fileusersstats.write('START OF FILE\n')
#   for i in range(0,len(users_stats)):
#      fileusersstats.write(str(users_stats[i][0]) + '|' + str(users_stats[i][1]) + '|' + str(users_stats[i][2]) + '|' + str(users_stats[i][3]) + '|' + str(users_stats[i][4]) + '|' + str(users_stats[i][5]) + '|' + str(users_stats[i][6]) + '|' + str(users_stats[i][7]) + '|' + str(users_stats[i][8]) + '|\n')

#   fileusersstats.write('END OF FILE\n')
#   fileusersstats.close()

   return wmsdata_list
Ejemplo n.º 12
0
def wms_sensor():
	'''wms_sensor() -> list of string in the followind order:
        running  - running condor job as reported by condor_q
        idle     - idle condor job as reported by condor_q
        current  - current condor job as reported by condor_q
        load     - machine load 15 as reported /proc/loadavg
        input_fl - unprocessed entries in input.fl
        queue_fl - unprocessed entries in queue.fl
        dg20     - number of dg20logd files in /var/log/glite
        ism_size - ism size in 1kB blocks
        ism_entries - CE ism entries
        sandbox  - Sandbox partition occupancy (in %)
        tmp      - tmp partition occupancy (in %)
        gftp     - number of gftp process
        FD_WM    - number of file descriptors opened by WM
        FD_LM    - number of file descriptors opened by LM
        FD_JC    - number of file descriptors opened by JC
        FD_LL    - number of file descriptors opened by LL
        LB       - status of LB daemon (0 is ok)
        LL       - status of LL daemon (0 is ok)
        LBPX     - status of LBPX daemon (0 is ok)
        PX       - status of PX daemon (0 is ok)
        FTPD     - status of FTPD daemon (0 is ok)
        JC       - status of JC daemon (0 is ok)
        LM       - status of LM daemon (0 is ok)
        WM       - status of WM daemon (0 is ok)
        WMP      - status of WMP daemon (0 is ok)
        ICE      - status of ICE daemon (0 is ok)
        BDII      - status of BDII daemon (0 is ok)
        NTPD      - status of NTPD daemon (0 is ok)
        varlog   - /var/log partition occupancy (in %)
        varlibmysql - /var/lib/mysql partition occupancy (in %)
	'''

	import os, commands, sys, fpformat
        sys.path.append('../../common')
        sys.path.append('../../common/classes')
	#import MySQLdb
	import time
	import readconf_func
        import socket
	#Sensor functions import
	import condor_func
	import load_func
	import dg20_func
	import ism_stat_func
	import filelists_func
	import diskspace_checks_func
	import gftp_num_func
	import file_desc_func
	import daemons_status_func
	import wms_balancing_metric_func      
        import ice_jobs_func
        import wms_class
 
	#Initializing logger
        import logging
        logger = logging.getLogger('wms_sensor')
	
	confvar = readconf_func.readconf();

	# Starting Calling sensor functions....
####################################################################################
#### starting backgroung process, we will look at the end if they finished
####################################################################################

     ## NO MORE NEEDED
        #Launching in backgroud the creation of the mappping table
        #cmd = confvar['INSTALL_PATH'] + "/sensors/bin/wms_usermapping/wms_usermapping_func &"
        #os.system(cmd)
     #######################

        #Launching the creation of the CE_MM file in background
        cmd = confvar['INSTALL_PATH'] + "/sensors/bin/CE_MM.sh " + confvar['WORKLOAD_MANAGER_LOG_FILE'] + " " + confvar['CE_MM_FILE'] + " &"
        os.system(cmd)



######################################################################################
######################################################################################

	# The condor_jobs first...
	logger.info('Calling condor_jobs function')
	#Return a list of total, idle, running, held jobs as reported by condor_q

	condor_list = condor_func.condor_jobs(confvar.get('ENV_FILE'))
	if condor_list[2] != None:
	   running = condor_list[2]
	else:
	   running = 'Null'
	if condor_list[0] != None:
	   current = condor_list[0]
	else:
	   current = 'Null'
	if condor_list[1] != None:
	   idle = condor_list[1]
	else:
	   idle = 'Null'


        # ...Then the ice_jobs ...
        logger.info('Calling ice_jobs function')
        #Return a list of total, idle, running, held jobs as reported by icedb tool
        ice_dict = ice_jobs_func.ice_jobs(confvar['ENV_FILE'])

	# ....Then the average cpu load  in past 15 min
        logger.info('Calling load function')
	loadtmp=load_func.load_cpu()
	if loadtmp != None:
	   load = loadtmp
	else:
	   load = 'Null'

	# Number of jobs in Input.fl and Queue.fl and ice.fl
        logger.info('Calling filelists function')
	filelist_tmp = filelists_func.filelists(confvar.get('ENV_FILE'))
        #print "filelists_tmp = " + str(filelist_tmp)
	if filelist_tmp[0] != None:
	   input_fl = filelist_tmp[0][0:len(filelist_tmp[0]) - 1]
	else:
	   input_fl = 'Null'
	if filelist_tmp[1] != None:
	   queue_fl = filelist_tmp[1][0:len(filelist_tmp[1]) - 1]
	else:
	   queue_fl = 'Null'
        if filelist_tmp[2] != None:
           ice_fl = filelist_tmp[2][0:len(filelist_tmp[1]) - 1]
        else:
           ice_fl = 'Null'

	# ....Then the number of dg20 files in the wms
	logger.info('Calling dg20log function')
	dg20 = dg20_func.dg20log( confvar.get('DG20_PATH'))
	if dg20 == None:
	   dg20 = 'Null'

        #...Then the ism status
        logger.info('Calling ism_stat function')
        ism_tmp = ism_stat_func.ism_stat(confvar.get('ISMDUMP_PATH'),confvar.get('GLITE_LOG_DIR'))
        if ism_tmp[0] == None:
           ism_tmp[0] = 'Null'
        if ism_tmp[1] == None:
           ism_tmp[1] = 'Null'
        ism_size = ism_tmp[0]
        ism_entries = ism_tmp[1]


	# % of disk occupacy hosting Sandbox and tmp directories
	logger.info("Calling diskspace_checks function")
	output_tmp=diskspace_checks_func.diskspace_checks(confvar.get('SANDBOX_PATH'),confvar.get('TMP_PATH'),confvar.get('VAR_LOG_PATH'),confvar.get('VAR_LIB_MYSQL_PATH'))
	if output_tmp[0] != None:
	   sandbox = output_tmp[0][0:len(output_tmp[0])]
	else:
	   sandbox = 'Null'
	if output_tmp[1] != None:
	   tmp = output_tmp[1][0:len(output_tmp[1])]
	else:
	   tmp = 'Null'
	if output_tmp[2] != None:
	   varlog = output_tmp[2][0:len(output_tmp[2])]
	else:
	   varlog = 'Null'
	if output_tmp[3] != None:
	   varlibmysql = output_tmp[3][0:len(output_tmp[3])]
	else:
	   varlibmysql = 'Null'


	# ....Then the number of gridftp sessions in the wms
	logger.info("Calling gftp_num function")
	if gftp_num_func.gftp_num() != None:
	   gftp = gftp_num_func.gftp_num()
	else:
	   gftp = 'Null'

	#.... Then file descriptors for  WM,LM,JC,LL
	logger.info("Calling file descriptor function")
	output_tmp=file_desc_func.file_desc(confvar.get('FD_WMS_WM'),confvar.get('FD_WMS_LM'),confvar.get('FD_WMS_JC'),confvar.get('FD_WMS_LBINTERLOG'))
	if output_tmp[0] != None:
	   FD_WM = output_tmp[0]
	else:
	   FD_WM = 'Null'
	if output_tmp[1] != None:
	   FD_LM = output_tmp[1]
	else:
	   FD_LM = 'Null'
	if output_tmp[2] != None:
	   FD_JC = output_tmp[2]
	else:
	   FD_JC = 'Null'
	if output_tmp[3] != None:
	   FD_LL = output_tmp[3]
	else:
	   FD_LL = 'Null'

	#.... Then checking wms daemons status for 'glite-lb-bkserverd','glite-lb-locallogger','glite-lb-proxy',
	#       'glite-proxy-renewald','glite-wms-ftpd','glite-wms-jc',
	#       'glite-wms-lm','glite-wms-wm','glite-wms-wmproxy''''
	logger.info("Calling daemons status check function")
	output_tmp=daemons_status_func.daemons_status(confvar.get('GLITE_DAEMONS_PATH'))
#        print 'daemons:', output_tmp, '\n'
#        print confvar.get('GLITE_DAEMONS_PATH')
	if output_tmp[0]!=None:
	   LL = output_tmp[0]
	else:
	   LL = 'Null'
	if output_tmp[1]!=None:
	   LBPX = output_tmp[1]
	else:
	   LBPX = 'Null'
	if output_tmp[2]!=None:
	   PX = output_tmp[2]
	else:
	   PX = 'Null'
	if output_tmp[3]!=None:
	   FTPD = output_tmp[3]
	else:
	   FTPD = 'Null'
	if output_tmp[4]!=None:
	   JC = output_tmp[4]
	else:
	   JC = 'Null'
	if output_tmp[5]!=None:
	   LM = output_tmp[5]
	else:
	   LM = 'Null'
	if output_tmp[6]!=None:
	   WM = output_tmp[6]
	else:
	   WM = 'Null'
	if output_tmp[7]!=None:
	   WMP = output_tmp[7]
	else:
	   WMP = 'Null'
	if output_tmp[8] != None:
	   ICE = output_tmp[8]
	else:
	   ICE = 'Null'
	if output_tmp[9] != None:
	   BDII = output_tmp[9]
	else:
	   BDII = 'Null'
	if output_tmp[10] != None:
	   NTPD = output_tmp[10]
	else:
	   NTPD = 'Null'

	#Logging fields

# Now we create the WMS object
        hostname = socket.getfqdn()
        WMS = wms_class.WMS(hostname)
 
        WMS['condor_running'] = str(running)
        WMS['condor_idle'] = str(idle)
        WMS['condor_current'] = str(current)
        WMS['ice_idle'] = str(ice_dict['IDLE'])
        WMS['ice_pending'] = str(ice_dict['PENDING'])
        WMS['ice_running'] = str(ice_dict['RUNNING'])
        WMS['ice_held'] = str(ice_dict['HELD'])
        WMS['cpu_load'] = str(load)
        WMS['wm_queue'] = str(input_fl)
        WMS['jc_queue'] = str(queue_fl)
        WMS['ice_queue'] = str(ice_fl)
        WMS['ism_size'] = str(ism_size)
        WMS['ism_entries'] = str(ism_entries)
        WMS['disk_sandbox'] = str(sandbox)
        WMS['disk_tmp'] = str(tmp)
        WMS['disk_varlog'] = str(varlog)
        WMS['disk_varlibmysql'] = str(varlibmysql)
        WMS['gftp_con'] = str(gftp)
        WMS['lb_event'] = dg20
        WMS['FD_WM'] = str(FD_WM)
        WMS['FD_LM'] = str(FD_LM)
        WMS['FD_JC'] = str(FD_JC)
        WMS['FD_LL'] = str(FD_LL)
        #WMS['LB'] = str(LB) # removed in 3.0
        #WMS.daemons_dict['LB'] = WMS['LB']   # removed in 3.0
        WMS['daemon_LL'] = str(LL)
        WMS.daemons_dict['daemon_LL'] = WMS['daemon_LL']
        WMS['daemon_LBPX'] = str(LBPX)
        WMS.daemons_dict['daemon_LBPX'] = WMS['daemon_LBPX']
        WMS['daemon_PX'] = str(PX)
        WMS.daemons_dict['daemon_PX'] = WMS['daemon_PX']
        WMS['daemon_FTPD'] = str(FTPD)
        WMS.daemons_dict['daemon_FTPD'] = WMS['daemon_FTPD']
        WMS['daemon_JC'] = str(JC)
        WMS.daemons_dict['daemon_JC'] = WMS['daemon_JC']
        WMS['daemon_LM'] = str(LM)
        WMS.daemons_dict['daemon_LM'] = WMS['daemon_LM']
        WMS['daemon_WM'] = str(WM)
        WMS.daemons_dict['daemon_WM'] = WMS['daemon_WM']
        WMS['daemon_WMP'] = str(WMP)
        WMS.daemons_dict['daemon_WMP'] = WMS['daemon_WMP']
        WMS['daemon_ICE'] = str(ICE)
        WMS.daemons_dict['daemon_ICE'] = WMS['daemon_ICE']
        WMS['daemon_BDII'] = str(BDII)
        WMS.daemons_dict['daemon_BDII'] = WMS['daemon_BDII']
        WMS['daemon_NTPD'] = str(NTPD)
        WMS.daemons_dict['daemon_NTPD'] = WMS['daemon_NTPD']

        logger.info('Calling wms_balancing_metric_func')
        metric_output = wms_balancing_metric_func.wms_balancing_metric(WMS)
       
        logger.debug("WMS values collected are:")
        logger.debug(str(WMS))
         
        
        # Before reuturning check if the wms_usermapping and CE_MM have finished
        # If not wait for a maximun of 30 seconds

        #logger.info("Waiting for usermap to complete its job")
        file_tmp = confvar['MAPTABLE_FILENAME']
        file_tmp2 = confvar['CE_MM_FILE']
        MAP_DONE = False
        MM_DONE = False
        LOOP_TIMEOUT = int( confvar['LOOP_TIMEOUT'] ) 
        START_LOOP_TIME = time.time()
        EXIT_THE_LOOP = False
        while EXIT_THE_LOOP == False and (time.time() - START_LOOP_TIME) < LOOP_TIMEOUT:
         #  if (os.access(file_tmp,os.F_OK) == True) and (os.WEXITSTATUS(os.system(("/usr/sbin/lsof " + file_tmp + " >/dev/null  2>&1"))) == 1):        
#   if (os.access(file_tmp,os.F_OK) == True) and (os.WEXITSTATUS(os.system(("/usr/sbin/lsof " + file_tmp ))) == 1) :
                  #yes, good, the files are accessible
        #          MAP_DONE = True
        #          logger.info("Usermap completed. Returning")
#           if (os.access(file_tmp2,os.F_OK) == True) and (os.WEXITSTATUS(os.system(("/usr/sbin/lsof " + file_tmp2 ))) == 1) :
           if (os.access(file_tmp2,os.F_OK) == True) and (os.WEXITSTATUS(os.system(("/usr/sbin/lsof " + file_tmp2 + " >/dev/null  2>&1"))) == 1):
                  #yes, good, the files are accessible
                  MM_DONE = True
                  logger.info("CE_MM completed. Returning")
           EXIT_THE_LOOP = MM_DONE

#        if MAP_DONE == False:
#           logger.warning("Usermap did not complete its job.")  #change this log message
        if MM_DONE == False:
           logger.warning("CE_MM did not complete its job.")  #change this log message

# now we should return  a wms object and the presence of the mapping file
        return WMS , MM_DONE
Ejemplo n.º 13
0
def lb_query(rowhost,STARTDATE,ENDDATE):

   #Initializing logger
   import logging
   logger = logging.getLogger('lb_apiquery')

################ INITIALIZATION ######
   confvar = readconf_func.readconf();
   API_CMD_PATH = './'
   wmsdata_list = []
   users_stats = []

#######################################

#################  FUNCTION DEFINITION #########
   def put_into_wmsdata(wmsdata_list,wmshostname,userdn,fieldlist,valuelist):
      wmsFOUND = False
      for wmsdata in wmsdata_list:
         if wmsdata.host == wmshostname:
            wmsFOUND = True
            try:
               wmsdata.add_user(userdn)
            except wmsdata_class.UserPresent:
               pass
            for field in fieldlist:
               wmsdata[userdn][field] = valuelist[fieldlist.index(field)]
      if not wmsFOUND:
         wmsdata = wmsdata_class.wmsdata(wmshostname)
         wmsdata.add_user(userdn)
         for field in fieldlist:
            wmsdata[userdn][field] = valuelist[fieldlist.index(field)]
         wmsdata_list.append(wmsdata)

   def group_by_key(api_output_list,keyposition,CNPROXYFLAG):
      #INPUTS:
      # - api_output_list: the output of api query command execution
      # - keyposition: the position of the key of grouping (ex. user DN or CE queue) in the output_list lines                                    splitted by separator
      # - SET CNPROXYFLAG to TRUE/1 to group DN which differs only by a "/CN=proxy/CN=proxy" SUFFIX
      # OUTPUTS:
      # - dictionary of key and count of occurrences"
      dictionary={}
      l_key=[]
      for l in api_output_list:
         l_key.append(l.split('\t')[keyposition])
      for key in set(l_key):
                 dictionary[key]=l_key.count(key)
      if CNPROXYFLAG:
         #grouping users and proxies 
         for key in dictionary.keys():
            index = key.find('/CN=proxy/CN=proxy')
            if index != -1:
               dn = key[0:index]
               if dictionary.has_key(dn):
                  dictionary[dn]= dictionary[dn] + dictionary.pop(key)
               else:
                  dictionary[dn]= dictionary.pop(key)
      return dictionary

   def resolve_jobuser(jobid):
      #INPUTS:
      # - jobid for which we want to derive user
      # OUTPUTS:
      # - job USER DN
      # N.B. it explouts lbproxy socket if available"

      import os.path
      if os.path.exists('/tmp/lb_proxy_serve.sock'):
         stream= os.popen("./job_status -x /tmp/lb_proxy_serve.sock " + jobid + " |grep owner")
         output=stream.readlines()
         if output:
            return output[0].split(':')[1]
      else:
         return 'Null'

   def checkoutput_to_resolve_jobuser(apiqueryoutput):
      #INPUTS:
      # - output lines from apiquery
      # OUTPUTS:
      # - job USER DN in lines with (null) string where owner!=user
      # N.B. it explouts lbproxy socket if available"

     import os.path
     if os.path.exists('/tmp/lb_proxy_serve.sock'):
     #    usersoutput = []
         logger.debug('entering checkoutput_to_resolve_jobuser function')
         out=apiqueryoutput
         #print "out dentro funzioncina prima" ,out, '\n\n'
         for iji in range(0,len(out)):
            if out[iji].split('\t')[0].find('(null)')!=-1:
               logger.debug('found (null) DN, for jobid:' + out[iji].split('\t')[1])
          #     print 'found (null) DN, for jobid:' + out[iji].split('\t')[1]
               user=resolve_jobuser(out[iji].split('\t')[1])
               logger.debug('substituted with:' + user)
               user=user.strip().strip('\n').lstrip().strip()
           #    print 'substituted with:' + user
               tmp=out[iji].replace('(null)',user,1)
               logger.debug('new line tmp ' +tmp)
            #   print 'new line tmp ' +tmp
               out[iji]=tmp
               logger.debug('new line apioutput ' + out[iji])
             #  print 'new line apioutput ' + out[iji]
#         print "out dentro funzioncina dopo" , out,'\n\n'
         return out
     else:
         logger.warning('NO lb-proxy-socket-file found, unable to determine some jobs OWNER field')
         return apiqueryoutput


############################################
########## STARTING  QUERIES ################

   # Run a MySQL query to find the number of jobs and collections submitted in a given time interval PER USER 
   logger.info('Running a MySQL query to find the number of jobs submitted in a given time interval PER USER')
   stream= os.popen(API_CMD_PATH + "/submitted_jobs " + STARTDATE + " " + ENDDATE)
   output=stream.readlines()
   if output:
      #checkin jobs with null owner
      output=checkoutput_to_resolve_jobuser(output)
      l_single=[]
      l_collection_user=[]
      l_collection_values=[]
      #SEPARATING SINGLE JOBS FROM COLLECTIONS
      for l1 in output:
         if l1.split('\t')[2]=='0':
            l_single.append(l1)
         else:
            l_collection_user.append(l1.split('\t')[0])
            l_collection_values.append(l1.split('\t')[2])            
      dict_tmp=group_by_key(l_single,0,1)     
      #STORING SINGLE JOBS DATA       
      for dn in dict_tmp.keys():
         put_into_wmsdata(wmsdata_list,rowhost,dn,['WMP_in'],[dict_tmp[dn]])          
      
 # def put_into_wmsdata(wmsdata_list,wmshostname,userdn,fieldlist,valuelist):
      #EXTRACTING COLLECTIONS DATA, GROUPING SAME USERS DN & PROXY
      dict_tmp={}
      for user in set(l_collection_user):
         values=[]
         for count in range(0,len(l_collection_user)):
            if l_collection_user[count]==user:
               values.append(int(l_collection_values[count]))
         #GROUPING DN AND PROXY OF SAME USER
         index = user.find('/CN=proxy/CN=proxy')
         if index != -1:
            #CASE with PROXY
            dn = key[0:index]
            if dict_tmp.has_key(dn):
               dict_tmp[dn][0]= dict_tmp[dn][0] + len(values)
               dict_tmp[dn][1]= dict_tmp[dn][1] + sum(values)
               dict_tmp[dn][2]= min(dict_tmp[dn][2], min(values))
               dict_tmp[dn][3]= max(dict_tmp[dn][3], max(values))
            else:
               dict_tmp[dn]= [len(values),sum(values),min(values),max(values)]
         else: 
            #CASE without PROXY : checking whether same user was alredy inserted as proxy 
            if dict_tmp.has_key(user):
               dict_tmp[user][0]= dict_tmp[user][0] + len(values)
               dict_tmp[user][1]= dict_tmp[user][1] + sum(values)
               dict_tmp[user][2]= min(dict_tmp[user][2], min(values))
               dict_tmp[user][3]= max(dict_tmp[user][3], max(values))
            else:
               dict_tmp[user]= [len(values),sum(values),min(values),max(values)]
      #STORING COLLECTIONS DATA

      for dn in dict_tmp.keys():
         put_into_wmsdata(wmsdata_list,rowhost,user,['WMP_in_col','WMP_in_col_nodes','WMP_in_col_min_nodes','WMP_in_col_max_nodes'],[len(values),sum(values),min(values),max(values)])

      #ESPLOITING REGISTER EVENT JOBS TO EXTRACT THE SET OF LB SERVER USED BY CONSIDERED WMS HOST
      dict_tmp={}
      l_key=[]
      #Notice that in LBPROXY CASE JUST 1 WMSHOST is in wmsdata_list. We keep the list as legacy...
      for wmsdata in wmsdata_list:
         for l in output:
             wmsdata.add_lb(l.split('\t')[1].split('/')[2].strip(':9000'))
                
   #  Run a query to find PER USER and PER WMS the number of jobs enqued to WM from WMP in a given time interval
   logger.info("Run a query to find PER USER and PER WMS the number of jobs enqued to WM from WMP in a given time interval")
   stream= os.popen(API_CMD_PATH + "/enqueued_WM_jobs " + STARTDATE + " " + ENDDATE)
   output=stream.readlines()
   if output:
      #checkin jobs with null owner
      output=checkoutput_to_resolve_jobuser(output)
      dict_tmp=group_by_key(output,0,1)
      for dn in dict_tmp.keys():
          put_into_wmsdata(wmsdata_list,rowhost,dn,['WM_in'],[dict_tmp[dn]])

   # Run a MySQL query to find the number both collection and single jobs enqueued to WM in a given time interval from LogMonitor (i.e. Resubmitted)
   logger.info('Run a query to find the number both collection and single jobs enqueued to WM in a given time interval from LogMonitor (i.e. Resubmitted) PER USER and PER WMS')
   stream= os.popen(API_CMD_PATH + "/resubmitted_WM_jobs " + STARTDATE + " " + ENDDATE)
   output=stream.readlines()
   if output:
      #checkin jobs with null owner
      output=checkoutput_to_resolve_jobuser(output)
      dict_tmp=group_by_key(output,0,1)
      for dn in dict_tmp.keys():
         put_into_wmsdata(wmsdata_list,rowhost,dn,['WM_in_res'],[dict_tmp[dn]])

   # Run a MySQL query to find the number single jobs enqueued to Job Controller from WM in a given time interval PER WMS and PER USER
   logger.info('Run a query to find the number single jobs enqueued to Job Controller from WM in a given time interval per USER and PER WMS')
   stream= os.popen(API_CMD_PATH + "/enqueued_JSS_jobs " + STARTDATE + " " + ENDDATE)
   output=stream.readlines()
   if output:
      #checkin jobs with null owner
      output=checkoutput_to_resolve_jobuser(output)
      dict_tmp=group_by_key(output,0,1)
      for dn in dict_tmp.keys():
         put_into_wmsdata(wmsdata_list,rowhost,dn,['JC_in'],[dict_tmp[dn]])


   # Run a MySQL query to find the number single jobs enqueued to Condor from Job Controller in a given time interval PER USER and PER WMS
   logger.info('Run a query to find the number single jobs enqueued to Condor from Job Controller in a given time interval PER USER and PER WMS')
   stream= os.popen(API_CMD_PATH + "/transfer_CONDOR_jobs " + STARTDATE + " " + ENDDATE)
   output=stream.readlines()
   if output:
      #checkin jobs with null owner
      output=checkoutput_to_resolve_jobuser(output)
      dict_tmp=group_by_key(output,0,1)
      for dn in dict_tmp.keys():
          put_into_wmsdata(wmsdata_list,rowhost,dn,['JC_out'],[dict_tmp[dn]])

  # Run a MySQL query to find the number of jobs done in a given time interval PER USER and PER WMS
   logger.info('Run a query to find the number single jobs done successfully in a given time interval PER USER and PER WMS')
   stream= os.popen(API_CMD_PATH + "/done_events " + STARTDATE + " " + ENDDATE)
   output=stream.readlines()
   if output:
      #checkin jobs with null owner
   #   print 'ouput prima',output
      output=checkoutput_to_resolve_jobuser(output)
#      print 'tmpouput ',tmpoutput
#      output=tmpoutput
 #     print 'ouput dopo',output
      dict_tmp=group_by_key(output,0,1)
      for dn in dict_tmp.keys():
          put_into_wmsdata(wmsdata_list,rowhost,dn,['JOB_DONE'],[dict_tmp[dn]])

  # Run a MySQL query to find the number of jobs aborted in a given time interval PER USER and PER WMS
   logger.info('Run a query to find the number single jobs aborted in a given time interval PER USER and PER WMS')
   stream= os.popen(API_CMD_PATH + "/abort_events " + STARTDATE + " " + ENDDATE)
   output=stream.readlines()
   if output:
      #checkin jobs with null owner
      output=checkoutput_to_resolve_jobuser(output)
      dict_tmp=group_by_key(output,0,1)
      for dn in dict_tmp.keys():
          put_into_wmsdata(wmsdata_list,rowhost,dn,['JOB_ABORTED'],[dict_tmp[dn]])

   # Run a query to find the DEST_CE of jobs in a given time interval PER WMS
   logger.info('Run a MySQL query to find  DEST_CE of jobs in a given time interval PER WMS')
   stream= os.popen(API_CMD_PATH + "/CE_histogram " + STARTDATE + " " + ENDDATE)
   output=stream.readlines()
   if output:
      #checkin jobs with null owner
      output=checkoutput_to_resolve_jobuser(output)
      dict_tmp=group_by_key(output,2,0)
      for CE in dict_tmp.keys():
            rowCE      = CE
            rowCEcount = dict_tmp[CE]
            wmsFOUND = False
            for wmsdata in wmsdata_list:
               if wmsdata.host == rowhost:
                  wmsFOUND = True
                  try:
                     wmsdata.add_ce(rowCE)
                     wmsdata.add_ce_count(rowCE,rowCEcount)
                  except wmsdata_class.CEPresent:
                    wmsdata.add_CE_count(rowCEcount)
            if not wmsFOUND:
               wmsdata = wmsdata_class.wmsdata(rowhost)
               wmsdata.add_ce(rowCE)
               wmsdata.add_ce_count(rowCE,rowCEcount)
               wmsdata_list.append(wmsdata)

   return wmsdata_list
Ejemplo n.º 14
0
def lb_query(lbhost, STARTDATE, ENDDATE, DBTYPE):

    #Initializing logger
    import logging
    logger = logging.getLogger('lb_query')

    confvar = readconf_func.readconf()

    users_stats = []
    # Establish a connection

    if DBTYPE == 'LBPROXY':
        lbhost = confvar['LBPROXY_DB_HOST']
        dbuser = confvar['LBPROXY_DB_USER']
        dbname = confvar['LBPROXY_DB_NAME']
    elif DBTYPE == 'LBSERVER':
        lbhost = confvar['LB_DB_HOST']
        dbuser = confvar['LB_DB_USER']
        dbname = confvar['LB_DB_NAME']

    logger.info('Establishing a connection with mysql DB')
    db = MySQLdb.connection(host=lbhost,
                            user=dbuser,
                            db=dbname,
                            passwd=confvar['SERVER_MYSQL_PASSWORD'][1:-1])

    ################ MAIN DATA CONTAINER LIST INITIALIZATION ######
    wmsdata_list = []

    ###############################################################

    def put_into_wmsdata(wmsdata_list, wmshostname, userdn, fieldlist,
                         valuelist):
        wmsFOUND = False
        for wmsdata in wmsdata_list:
            if wmsdata.host == wmshostname:
                wmsFOUND = True
                try:
                    wmsdata.add_user(userdn)
                except wmsdata_class.UserPresent:
                    #              logger.warning('User Already present in wmdata for host: ' + wmsdata.host)
                    for field in fieldlist:
                        wmsdata[userdn][field] = valuelist[fieldlist.index(
                            field)]
        if not wmsFOUND:
            wmsdata = wmsdata_class.wmsdata(wmshostname)
            wmsdata.add_user(userdn)
            for field in fieldlist:
                wmsdata[userdn][field] = valuelist[fieldlist.index(field)]
            wmsdata_list.append(wmsdata)

    # Run a MySQL query to find the number of single jobs submitted in a given time interval PER USER and PER WMS
    logger.info(
        'Running a MySQL query to find the number of single jobs submitted in a given time interval PER USER and PER WMS'
    )
    querystr = "select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where events.event=short_fields.event and code='17' and time_stamp>'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and name='NSUBJOBS' and value='0' group by users.cert_subj,host;"
    logger.info('Query is : ' + querystr)
    db.query(querystr)
    r = db.store_result()
    # Iterate through the result set
    WMP_in = 0
    if r:
        for i in range(1, r.num_rows() + 1):
            row = r.fetch_row()
            #  logger.debug('FOUND ROW: ' + row )
            if row:
                dn = row[0][0]
                rowhost = row[0][1]
                rowWMP_in = row[0][2]

                put_into_wmsdata(wmsdata_list, rowhost, dn, ['WMP_in'],
                                 [rowWMP_in])

######################################################################################################################
### We decided to take anymore the avg and the std of nodes per collection because they are not summable on more lb
### WHat we do is to take PER USER the total number of jobs in collection, the min and max of nodes per collection
### This are summable and avg calculation can be done on collector side
### Anyway we sum over user on sensors side and we return alse the total number of jobs per collection, min and max of nodes PER WMS
### Summing over wmsdata data will be done at the end of this function ore on the wrapper if the wmsdata_list is returned
##########################################################################################################################

# Run a query to find per user and per host the number of collection, the total number of nodes in collection the min and max of nodes per collection

    logger.info(
        'Running a query to find per user and per host the number of collection, the total number of nodes in collection the min and max of nodes per collection'
    )
    querystr = "select users.cert_subj, host, COUNT(value), sum(value), min(value),max(value) from events,short_fields inner join users on events.userid=users.userid where events.event=short_fields.event and code='17' and time_stamp>'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and name='NSUBJOBS' and short_fields.event='0' and value>'0' group by users.cert_subj,host"
    logger.info('Query is : ' + querystr)
    db.query(querystr)
    r = db.store_result()
    # Iterate through the result set
    if r:
        for i in range(1, r.num_rows() + 1):
            row = r.fetch_row()
            if row:
                dn = row[0][0]
                rowhost = row[0][1]
                rowWMP_in_col = row[0][2]
                rowWMP_in_col_nodes = row[0][3]
                rowWMP_in_col_min_nodes = row[0][4]
                rowWMP_in_col_max_nodes = row[0][5]

                put_into_wmsdata(wmsdata_list, rowhost, dn, [
                    'WMP_in_col', 'WMP_in_col_nodes', 'WMP_in_col_min_nodes',
                    'WMP_in_col_max_nodes'
                ], [
                    rowWMP_in_col, rowWMP_in_col_nodes,
                    rowWMP_in_col_min_nodes, rowWMP_in_col_max_nodes
                ])

#  Run a query to find PER USER and PER WMS the number of jobs enqued to WM from WMP in a given time interval
    logger.info(
        "Run a query to find PER USER and PER WMS the number of jobs enqued to WM from WMP in a given time interval"
    )
    querystr = "select  users.cert_subj, host, COUNT(events.jobid) from events,short_fields inner join users on events.userid=users.userid where events.event=short_fields.event and code='4' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and events.event=short_fields.event and  prog='NetworkServer' and name='RESULT' and value='OK' group by users.cert_subj,host;"
    logger.info('Query is : ' + querystr)
    db.query(querystr)
    r = db.store_result()
    if r:
        for i in range(1, r.num_rows() + 1):
            row = r.fetch_row()
            if row:
                dn = row[0][0]
                rowhost = row[0][1]
                rowWM_in = row[0][2]

                put_into_wmsdata(wmsdata_list, rowhost, dn, ['WM_in'],
                                 [rowWM_in])

    # Run a MySQL query to find the number both collection and single jobs enqueued to WM in a given time interval from LogMonitor (i.e. Resubmitted)
    logger.info(
        'Run a MySQL query to find the number both collection and single jobs enqueued to WM in a given time interval from LogMonitor (i.e. Resubmitted) PER USER and PER WMS'
    )
    querystr = "select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where code='4' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and events.event=short_fields.event and name='RESULT' and value='OK' and prog='LogMonitor' group by users.cert_subj, host;"
    logger.info('Query is : ' + querystr)
    db.query(querystr)
    r = db.store_result()
    # Iterate through the result set
    if r:
        for i in range(1, r.num_rows() + 1):
            row = r.fetch_row()
            if row:
                usernew = row[0][0]
                index = row[0][0].find('/CN=proxy/CN=proxy')
                if index != -1:
                    usernew = row[0][0][0:index]
                dn = usernew
                rowhost = row[0][1]
                rowWM_in_res = row[0][2]

                put_into_wmsdata(wmsdata_list, rowhost, dn, ['WM_in_res'],
                                 [rowWM_in_res])

    # Run a MySQL query to find the number single jobs enqueued to Job Controller from WM in a given time interval PER WMS and PER USER
    logger.info(
        'Run a MySQL query to find the number single jobs enqueued to Job Controller from WM in a given time interval per USER and PER WMS'
    )
    querystr = "select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where code='4' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and events.event=short_fields.event and name='RESULT' and value='OK' and prog='WorkloadManager' group by users.cert_subj,host;"
    logger.info('Query is : ' + querystr)
    db.query(querystr)
    r = db.store_result()
    # Iterate through the result set
    if r:
        for i in range(1, r.num_rows() + 1):
            row = r.fetch_row()
            if row:
                usernew = row[0][0]
                index = row[0][0].find('/CN=proxy/CN=proxy')
                if index != -1:
                    usernew = row[0][0][0:index]
                dn = usernew
                rowhost = row[0][1]
                rowJC_in = row[0][2]

                put_into_wmsdata(wmsdata_list, rowhost, dn, ['JC_in'],
                                 [rowJC_in])

    # Run a MySQL query to find the number single jobs enqueued to Condor from Job Controller in a given time interval PER USER and PER WMS
    logger.info(
        'Run a MySQL query to find the number single jobs enqueued to Condor from Job Controller in a given time interval PER USER and PER WMS'
    )
    querystr = "select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where code='1' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and events.jobid=short_fields.jobid and events.event=short_fields.event and name='RESULT' and value='OK' and prog='JobController' group by users.cert_subj,host;"
    logger.info('Query is : ' + querystr)
    db.query(querystr)
    r = db.store_result()
    # Iterate through the result set
    if r:
        for i in range(1, r.num_rows() + 1):
            row = r.fetch_row()
            if row:
                usernew = row[0][0]
                index = row[0][0].find('/CN=proxy/CN=proxy')
                if index != -1:
                    usernew = row[0][0][0:index]
                dn = usernew
                rowhost = row[0][1]
                rowJC_out = row[0][2]

                put_into_wmsdata(wmsdata_list, rowhost, dn, ['JC_out'],
                                 [rowJC_out])

# Run a MySQL query to find the number of jobs done in a given time interval PER USER and PER WMS
    logger.info(
        'Run a MySQL query to find the number single jobs done successfully in a given time interval PER USER and PER WMS'
    )
    querystr = "select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events,short_fields inner join users on events.userid=users.userid where events.jobid=short_fields.jobid and code='10' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and prog='LogMonitor' and name='REASON' and (value='Job terminated successfully' or value='Job Terminated Successfully') group by users.cert_subj,host;"
    logger.info('Query is : ' + querystr)
    db.query(querystr)
    r = db.store_result()
    # Iterate through the result set
    if r:
        for i in range(1, r.num_rows() + 1):
            row = r.fetch_row()
            if row:
                usernew = row[0][0]
                index = row[0][0].find('/CN=proxy/CN=proxy')
                if index != -1:
                    usernew = row[0][0][0:index]
                dn = usernew
                rowhost = row[0][1]
                rowJOB_DONE = row[0][2]

                put_into_wmsdata(wmsdata_list, rowhost, dn, ['JOB_DONE'],
                                 [rowJOB_DONE])

# Run a MySQL query to find the number of jobs aborted in a given time interval PER USER and PER WMS
    logger.info(
        'Run a MySQL query to find the number single jobs aborted in a given time interval PER USER and PER WMS'
    )
    querystr = "select users.cert_subj,host,COUNT(DISTINCT(events.jobid)) from events inner join users on events.userid=users.userid where code='12' and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' group by users.cert_subj,host;"

    logger.info('Query is : ' + querystr)
    db.query(querystr)
    r = db.store_result()
    # Iterate through the result set
    if r:
        for i in range(1, r.num_rows() + 1):
            row = r.fetch_row()
            if row:
                usernew = row[0][0]
                index = row[0][0].find('/CN=proxy/CN=proxy')
                if index != -1:
                    usernew = row[0][0][0:index]
                dn = usernew
                rowhost = row[0][1]
                rowJOB_ABORTED = row[0][2]

                put_into_wmsdata(wmsdata_list, rowhost, dn, ['JOB_ABORTED'],
                                 [rowJOB_ABORTED])

# Run a MySQL query to find the DEST_CE of jobs in a given time interval PER WMS
    logger.info(
        'Run a MySQL query to find  DEST_CE of jobs in a given time interval PER WMS'
    )

    ##### old ce query - this double counts ce for jobs landed onto cream ce

    #querystr="select value, host, COUNT(value) from (select DISTINCT(short_fields.event),events.jobid, short_fields.value, host from events,short_fields where events.jobid=short_fields.jobid  and time_stamp >'" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and prog='WorkloadManager' and name='DEST_HOST' and value!='localhost' and value!='unavailable' and code='15') as temp group by value, host;"
    ##################################################

    ##### New query not to double counting ce for jobs landed onto cream ce
    querystr = "select value,host,  count(value) from (select distinct(short_fields.jobid), value, host from short_fields inner join events where events.code='15' and events.prog = 'WorkloadManager' and name='DEST_HOST' and time_stamp > '" + STARTDATE + "' and time_stamp <='" + ENDDATE + "' and value!='localhost' and value!='unavailable' and events.jobid=short_fields.jobid) as temp group by value, host;"
    ##################################################

    logger.info('Query is : ' + querystr)
    db.query(querystr)
    r = db.store_result()
    # Iterate through the result set
    if r:
        for i in range(1, r.num_rows() + 1):
            row = r.fetch_row()
            if row:
                rowCE = row[0][0]
                rowhost = row[0][1]
                rowCEcount = row[0][2]
                wmsFOUND = False
                for wmsdata in wmsdata_list:
                    if wmsdata.host == rowhost:
                        wmsFOUND = True
                        try:
                            wmsdata.add_ce(rowCE)
                            wmsdata.add_ce_count(rowCE, rowCEcount)
                        except wmsdata_class.CEPresent:
                            #                   logger.warning('User Already present in wmdata for host: ' + wmsdata.host)
                            wmsdata.add_CE_count(rowCEcount)
                if not wmsFOUND:
                    wmsdata = wmsdata_class.wmsdata(rowhost)
                    wmsdata.add_ce(rowCE)
                    wmsdata.add_ce_count(rowCE, rowCEcount)
                    wmsdata_list.append(wmsdata)


# Run a MySQL query to find the LB used to store the jobs in a given time interval
# Available only if DBTYPE = LBPROXY

    if DBTYPE == 'LBPROXY':
        logger.info(
            'Run a MySQL query to find the LB used to store the jobs in a given time interval'
        )
        querystr = "select distinct dg_jobid from jobs inner join events on jobs.jobid=events.jobid where events.code = '17' and time_stamp > '" + STARTDATE + "' and time_stamp < '" + ENDDATE + "';"
        logger.info('Query is : ' + querystr)
        db.query(querystr)
        r = db.store_result()
        # Iterate through the result set
        if r:
            for i in range(1, r.num_rows() + 1):
                row = r.fetch_row()
                if row:
                    rowLB = row[0][0]
                    LBstr = LBstr = rowLB[rowLB.find('//') +
                                          2:rowLB.find(':9000')]
                    for wmsdata in wmsdata_list:
                        wmsdata.add_lb(LBstr)

    db.close()

    #   filename= confvar['INSTALL_PATH'] +'/sensors//tmp/USERSTATS_' +  lbhost + '_' + wmshost + '.txt'

    #   fileusersstats = open(filename,'w')
    #   fileusersstats.write('START OF FILE\n')
    #   for i in range(0,len(users_stats)):
    #      fileusersstats.write(str(users_stats[i][0]) + '|' + str(users_stats[i][1]) + '|' + str(users_stats[i][2]) + '|' + str(users_stats[i][3]) + '|' + str(users_stats[i][4]) + '|' + str(users_stats[i][5]) + '|' + str(users_stats[i][6]) + '|' + str(users_stats[i][7]) + '|' + str(users_stats[i][8]) + '|\n')

    #   fileusersstats.write('END OF FILE\n')
    #   fileusersstats.close()

    return wmsdata_list
Ejemplo n.º 15
0
#! /usr/bin/python
# Main program to call sensor functions
import os, commands, sys, fpformat
sys.path.append('/opt/WMSMonitor/collector/bin/')
import logging
import logpredef
logger = logging.getLogger('wms_balancing_arbiter')
import readconf_func
conf=readconf_func.readconf()
def mail_notification(subject,body):     
      try:
           #sending mail....
           SENDMAIL = "/usr/sbin/sendmail" # sendmail location
           p = os.popen("%s -t" % SENDMAIL, "w")
           p.write("To: " + conf.get('LOAD_BALANCING_SITE_CONTACT') + "\n")
           p.write(subject)
           p.write("\n") # blank line separating headers from body
           p.write(body)
           sts = p.close()
           if sts != 0:
              logger.info("Sendmail exit status" + str(sts))
      except Exception,e:     
           logger.error("ERROR SENDING MAIL: " + str(e))


def wms_balancing_arbiter():
        '''wms_balancing_arbiter() -> updating wms instances available behind an alias
           depending on the load of the instances according to the load metric provided by
           wms_balancing_metric function 
           Return None if errors are raised during calculation.
        '''
Ejemplo n.º 16
0
def wms_usermapping():

    #Initializing logger
    import logging
    import logpredef_wmslb 
    logger = logging.getLogger('wms_usermapping')
    confvar=readconf_func.readconf();
    timenow = str(int(time.mktime(time.localtime())))
 
    hostname=socket.getfqdn()
    logger.info("server hostname is :" + hostname)

    if hostname == '':
      logger.error('Could not determine machine hostname! Exiting...')
      sys.exit(1)
 
    filename = confvar['INSTALL_PATH'] + '/sensors/tmp/USERSMAPPING.txt'
    f = open(filename,'w')
    f.write("START OF MAPPING TABLE\n")
    data = "\"" + time.strftime("%d %b",time.localtime()) + "\""

    #deciding whether to use rotated log or not

    logfile = ''
    maxlog = int(confvar['MAX_ROTATED_LOG'])
    for i in range(maxlog + 1):
       if i == 0:
          if  (os.access(confvar.get('GLITE_LOG_DIR') + '/wmproxy.log',os.F_OK) == True):
             logfile = logfile + confvar.get('GLITE_LOG_DIR') + '/wmproxy.log'
       else:
          fname = confvar.get('GLITE_LOG_DIR') + '/wmproxy.log.' + str(i)
          if  (os.access(fname,os.F_OK) == True):
             std = os.popen("tail -2 " + fname + " | grep " + data)
             if len(std.readlines()):
                logfile = logfile + ' ' + fname

    cmd = "grep " + data + " " + logfile + " |grep -A1 CLIENT > " + confvar['INSTALL_PATH'] + "/sensors/tmp/tmpgrep.txt"
    if (os.system(cmd) == 0): 
       logfile = confvar['INSTALL_PATH'] + "/sensors/tmp/tmpgrep.txt"
       cmd = "cat " + logfile + " | grep CLIENT| sed -e 's/.*DN: //g' -e 's/\/CN=proxy.*//g' |" + "grep -v " + data + " " + "| sort |uniq"
       std = os.popen(cmd)
       stdstr =  std.readlines()
       if ( len(stdstr) > 0 ):
         for line in stdstr:
                ltmp = line.split('/CN=')
                user = ltmp[len(ltmp)-1]
                user = user.split('/')[0]
                cmd =  'grep -A1 "' + user.rstrip() + '" '  + logfile + " |grep Role |tail -1 | sed \'s/.*VOMS.*0 //g\'"
                std = os.popen(cmd)
                stdstr =  std.readlines()
                if ( len(stdstr) > 0 ):
                    istr = stdstr[0]
                    for l in istr.split(' '):
                        if l.find('Role')!= -1:
                           VO = l.split('/')[1]
                           VO_SUB = l[(l.find(VO)+len(VO)):-1]
#                           CAPABILITY = l.split('/')[3].split('=')[1]
#                           print 'USER: '******'\nVO = ' + VO + '\nRole = ' + ROLE + '\nCapability = ' + CAPABILITY 
#                           print line.rstrip(),' ',user.rstrip(), ' ', VO , ' ', VO_SUB
                           f.write(line.rstrip() + ' | ' + VO + ' | ' + VO_SUB + '\n')
                           
                else:
                     logger.error("ERROR: Could not determine User /VO/Role/Capability ! \n")
       else:
         logger.error("FILE " + logfile + " NOT FOUND! Exiting...\n")

       f.write("END OF MAPPING TABLE\n")
       f.close()
       cmd = 'rm -f ' + logfile
       os.system(cmd)
Ejemplo n.º 17
0
def wms_usermapping():

    #Initializing logger
    import logging
    import logpredef_wmslb
    logger = logging.getLogger('wms_usermapping')
    confvar = readconf_func.readconf()
    timenow = str(int(time.mktime(time.localtime())))

    hostname = socket.getfqdn()
    logger.info("server hostname is :" + hostname)

    if hostname == '':
        logger.error('Could not determine machine hostname! Exiting...')
        sys.exit(1)

    filename = confvar['INSTALL_PATH'] + '/sensors/tmp/USERSMAPPING.txt'
    f = open(filename, 'w')
    f.write("START OF MAPPING TABLE\n")
    data = "\"" + time.strftime("%d %b", time.localtime()) + "\""

    #deciding whether to use rotated log or not

    logfile = ''
    maxlog = int(confvar['MAX_ROTATED_LOG'])
    for i in range(maxlog + 1):
        if i == 0:
            if (os.access(
                    confvar.get('GLITE_LOG_DIR') + '/wmproxy.log',
                    os.F_OK) == True):
                logfile = logfile + confvar.get(
                    'GLITE_LOG_DIR') + '/wmproxy.log'
        else:
            fname = confvar.get('GLITE_LOG_DIR') + '/wmproxy.log.' + str(i)
            if (os.access(fname, os.F_OK) == True):
                std = os.popen("tail -2 " + fname + " | grep " + data)
                if len(std.readlines()):
                    logfile = logfile + ' ' + fname

    cmd = "grep " + data + " " + logfile + " |grep -A1 CLIENT > " + confvar[
        'INSTALL_PATH'] + "/sensors/tmp/tmpgrep.txt"
    if (os.system(cmd) == 0):
        logfile = confvar['INSTALL_PATH'] + "/sensors/tmp/tmpgrep.txt"
        cmd = "cat " + logfile + " | grep CLIENT| sed -e 's/.*DN: //g' -e 's/\/CN=proxy.*//g' |" + "grep -v " + data + " " + "| sort |uniq"
        std = os.popen(cmd)
        stdstr = std.readlines()
        if (len(stdstr) > 0):
            for line in stdstr:
                ltmp = line.split('/CN=')
                user = ltmp[len(ltmp) - 1]
                user = user.split('/')[0]
                cmd = 'grep -A1 "' + user.rstrip(
                ) + '" ' + logfile + " |grep Role |tail -1 | sed \'s/.*VOMS.*0 //g\'"
                std = os.popen(cmd)
                stdstr = std.readlines()
                if (len(stdstr) > 0):
                    istr = stdstr[0]
                    for l in istr.split(' '):
                        if l.find('Role') != -1:
                            VO = l.split('/')[1]
                            VO_SUB = l[(l.find(VO) + len(VO)):-1]
                            #                           CAPABILITY = l.split('/')[3].split('=')[1]
                            #                           print 'USER: '******'\nVO = ' + VO + '\nRole = ' + ROLE + '\nCapability = ' + CAPABILITY
                            #                           print line.rstrip(),' ',user.rstrip(), ' ', VO , ' ', VO_SUB
                            f.write(line.rstrip() + ' | ' + VO + ' | ' +
                                    VO_SUB + '\n')

                else:
                    logger.error(
                        "ERROR: Could not determine User /VO/Role/Capability ! \n"
                    )
        else:
            logger.error("FILE " + logfile + " NOT FOUND! Exiting...\n")

        f.write("END OF MAPPING TABLE\n")
        f.close()
        cmd = 'rm -f ' + logfile
        os.system(cmd)