Esempio n. 1
0
 def startvm(self, master):
     runCommand("wget --directory-prefix=/tmp/ https://raw.github.com/fbarreir/CloudVMManager/master/wnconfig_analysis.sh --no-check-certificate")
     f = open("/tmp/wnconfig_analysis.sh",'r')
     s = f.read()
     f.close()
     runCommand("rm -f /tmp/wnconfig_analysis.sh")
     a=runCommand('stratus-run-instance --context="EC2_USER_DATA=' + base64.standard_b64encode(s.replace('$1', master, 1)) + '" --endpoint=$STRATUSLAB_ENDPOINT --username=$STRATUSLAB_USERNAME --password=$STRATUSLAB_PASSWORD --key=$STRATUSLAB_KEY  $IMG')
     return a
Esempio n. 2
0
 def configure_vm(self,vm_ip):
     a="0"
     r=StratusAdaptor()
     while a=="0":
         time.sleep(60)
         a=runCommand("stratus-describe-instance|grep "+vm_ip+"| awk '{print $4}'")
         b=runCommand("stratus-describe-instance|grep "+vm_ip+"| awk '{print $2}'")
         a=str(a[0][0])
         b=str(b[0][:-1])
         if b=="Failed":
             break
     #if a!="0" and b!="Failed":
         #time.sleep(240)
         #StratusAdaptor.execscript(r, vm_ip, master)
         ##optional command for analysis jobs proxy server
         #runCommand("scp -o StrictHostKeyChecking=no -i $STRATUSLAB_PRIVATE_KEY /home/cms001/crab/CMSSW_5_0_1/src/x509up_u15305 root@"+ vm_ip +":/data")
         #runCommand(". ./wnconf.sh")
         #return 0
     if b=="Failed":
         vmid=runCommand("stratus-describe-instance|grep "+vm_ip+"| awk '{print $1}'")
         vmid=str(vmid[0][:-1])
         StratusAdaptor.stopvm(r,vmid)
         return vmid
     return 0
Esempio n. 3
0
 def vmstatus(self,p):
     vm_id=runCommand("stratus-describe-instance|awk "+p)
     return vm_id
Esempio n. 4
0
 def stopvm(self, vmid):
     runCommand("stratus-kill-instance --endpoint=$STRATUSLAB_ENDPOINT --username=$STRATUSLAB_USERNAME --password=$STRATUSLAB_PASSWORD "+vmid)
Esempio n. 5
0
def main(config, logger):
    x, y, z, LOG_DIR,master = getConfig(config,logger)
    #to check for exit code of the executed commands
    a=cstatus("grep /LINUX|awk '{print $2}'")
    if a[1]!=0:
        logger.error('condor_status failed with exitcode ' + str(a[2]))
        return False
    b=cq("grep jobs|awk '{print $1}'")
    if b[1]!=0:
        logger.error("condor_q failed with exitcode " + str(b[2]))
        return False
    d=cq("grep running|awk '{print $5}'")
    if d[1]!=0:
        logger.error("condor_q failed with exitcode " + str(d[2]))
        return False
    e=cq("grep idle|awk '{print $3}'")
    if e[1]!=0:
        logger.error("condor_q failed with exitcode " + str(e[2]))
        return False
    f=cq("grep held|awk '{print $7}'")
    if f[1]!=0:
        logger.error("condor_q failed with exitcode " + str(f[2]))
        return False
    try:
        logger.info('\nTIMESTAMP:'+str(time.asctime( time.localtime(time.time()) ))+'\nNumber of worker nodes : '+a[0]+'\nNumber of jobs submitted :'+b[0]+'\nNumber of jobs running :'+str(d[0])+'\nNumber of idle jobs :'+str(e[0])+'\nNumber of held jobs :'+str(f[0]))
        s=StratusAdaptor()
        t=0
        if a[0]=='' or a[0]=="":
            a=('0',)+a[1:]
        if int(a[0])<int(b[0]) and int(a[0])<=int(y):
            if int(b[0])<=int(y):
                more_wn=int(b[0])-int(a[0])
            else:
                more_wn=int(y)-int(a[0])
            logger.info('\nStart '+str(more_wn)+' more worker node(s)')
            new_list=[]
            for i in range(0,more_wn):
                new=StratusAdaptor.startvm(s, master)
                new=new[0][new[0].index('134.'):new[0].index('Done')-5]
                new_list.append(new)
            for i in new_list:
                st=StratusAdaptor.configure_vm(s,i)
                if st!=0:
                    logger.warning("Machine failed to start....Killing instance "+st)
        elif a[0]>b[0]:
            #to shutdown nodes that are idle for too long
            q=runCommand( "condor_status -verbose|grep -E 'Machine = |EnteredCurrentActivity|Activity'|grep -v 'ClientMachine ='|awk '{ print $3 }'")
            q= q[0].split('\n')
            q.remove('')
            p=0
            for i in q:
                if int(time.time())-int(q[p+2])<int(z) and q[p+1]=='"Idle"':
                    t+=1
                if int(time.time())-int(q[p+2])>int(z) and q[p+1]=='"Idle"':
                    t-=1
                    mip=q[p][q[p].index('-')+1:q[p].index('.')]
                    vm_id=StratusAdaptor.vmstatus(s,'{print $1}')
                    vm_ip=StratusAdaptor.vmstatus(s,'{print $6}')
                    vm_id=vm_id[0].split('\n')
                    vm_id=vm_id[1:]
                    vm_id.remove('')
                    vm_ip=vm_ip[0].split('\n')
                    vm_ip.remove('')
                    vm_ip.remove('ip')
                    maddr='134.158.75.'+mip
                    logger.info('Shutting down worker node :'+ q[p]+':'+maddr+' because it is unused for a long time')
                    for i in vm_ip:
                        if i==maddr:
                            g=vm_ip.index(i)
                    StratusAdaptor.killvm(s,vm_id[g])
                if p+3<len(q):
                    p+=3
                else:
                    break
        #to check if there are idle jobs and also idle machines
        if int(e[0])>0 and t>0:
            logger.warning('\nJobs idle...not yet assigned to idle machines') 

        #to check if there are too many jobs
        if int(x)<int(d[0]):
            logger.error('\nToo many jobs running..Please remove some jobs')
        #to check if there are no more worker nodes
        if int(y)<int(a[0]):
            logger.error('\nMaximum worker nodes limit is reached. Cannot start any more worker nodes!!')
    except IOError, ioer:
        print ioerr
        logger.error('\nI/O Error occurred')
        return False
Esempio n. 6
0
def cq(z):
    b=runCommand("condor_q|"+z)
    return b
Esempio n. 7
0
def cstatus(z):
    a=runCommand("condor_status|"+z)
    return a
Esempio n. 8
0
 def startvm(self):
     a=runCommand("stratus-run-instance --endpoint=$STRATUSLAB_ENDPOINT --username=$STRATUSLAB_USERNAME --password=$STRATUSLAB_PASSWORD --key=$STRATUSLAB_KEY  $IMG")
     return a