Example #1
0
def injector(url, options, specific):
    mlock = moduleLock()
    if mlock(): return

    use_mcm = True
    up = componentInfo(soft=['mcm', 'wtc'])
    if not up.check(): return
    use_mcm = up.status['mcm']

    UC = unifiedConfiguration()

    transform_keywords = UC.get('convert_to_stepchain')

    workflows = getWorkflows(url, status=options.wmstatus, user=options.user)
    for user in UC.get("user_rereco"):
        workflows.extend(
            getWorkflows(url,
                         status=options.wmstatus,
                         user=user,
                         rtype="ReReco"))
    for user in (options.user_relval.split(',')
                 if options.user_relval else UC.get("user_relval")):
        workflows.extend(
            getWorkflows(url,
                         status=options.wmstatus,
                         user=user,
                         rtype="TaskChain"))
    for user in (options.user_storeresults.split(',') if
                 options.user_storeresults else UC.get("user_storeresults")):
        workflows.extend(
            getWorkflows(url,
                         status=options.wmstatus,
                         user=user,
                         rtype="StoreResults"))

    print len(workflows), "in line"
    cannot_inject = set()
    to_convert = set()
    status_cache = defaultdict(str)

    ## browse for assignment-approved requests, browsed for ours, insert the diff
    for wf in workflows:
        if specific and not specific in wf: continue

        exists = session.query(Workflow).filter(Workflow.name == wf).first()
        if not exists:
            wfi = workflowInfo(url, wf)
            ## check first that there isn't related here with something valid
            can_add = True
            ## first try at finding a match
            familly = session.query(Workflow).filter(
                Workflow.name.contains(wfi.request['PrepID'])).all()
            if not familly:
                pids = wfi.getPrepIDs()
                req_familly = []
                for pid in pids:
                    req_familly.extend(getWorkflowById(url, pid, details=True))

                familly = []
                print len(req_familly), "members"
                for req_member in req_familly:
                    #print "member",req_member['RequestName']
                    owfi = workflowInfo(url,
                                        req_member['RequestName'],
                                        request=req_member)
                    other_pids = owfi.getPrepIDs()
                    if set(pids) == set(other_pids):
                        ## this is a real match
                        familly.extend(
                            session.query(Workflow).filter(
                                Workflow.name ==
                                req_member['RequestName']).all())

            for lwfo in familly:
                if lwfo:
                    ## we have it already
                    if not lwfo.status in [
                            'forget', 'trouble', 'forget-unlock',
                            'forget-out-unlock'
                    ]:
                        wfi.sendLog(
                            'injector', "Should not put %s because of %s %s" %
                            (wf, lwfo.name, lwfo.status))
                        sendLog('injector',
                                "Should not put %s because of %s %s" %
                                (wf, lwfo.name, lwfo.status),
                                level='critical')
                        print "Should not put", wf, "because of", lwfo.name, lwfo.status
                        cannot_inject.add(wf)
                        can_add = False
            ## add a check on validity of input datasets
            _, prim, par, sec = wfi.getIO()
            for d in list(prim) + list(par) + list(sec):
                if not d in status_cache:
                    status_cache[d] = getDatasetStatus(d)
                if status_cache[d] != 'VALID':
                    wfi.sendLog(
                        'injector', "One of the input is not VALID. %s : %s" %
                        (d, status_cache[d]))
                    sendLog('injector',
                            "One of the input of %s is not VALID. %s : %s" %
                            (wf, d, status_cache[d]),
                            level='critical')
                    can_add = False
                ## check for any file in phedex, to verify existence
                _, ph_files, _, _ = getDatasetFiles(url, d)
                if not ph_files and not ('StoreResults'
                                         == wfi.request.setdefault(
                                             'RequestType', None)):
                    wfi.sendLog(
                        'injector',
                        "One of the input has no file in phedex: %s" % d)
                    sendLog('injector',
                            "One of the input has no file in phedex: %s" % d,
                            level='critical')
                    can_add = False

            ### ban some workflow that you don't like anymore
            #outputs = wfi.request['OutputDatasets']

            if not can_add: continue

            ## temporary hack to transform specific taskchain into stepchains
            #good_for_stepchain = wfi.isGoodToConvertToStepChain( keywords = transform_keywords)
            good_for_stepchain = wfi.isGoodToConvertToStepChain(keywords=None)

            ## match keywords and technical constraints
            #if (not options.no_convert) and good_for_stepchain and not wfi.isRelval():
            #    to_convert.add( wf )
            #    wfi.sendLog('injector','Transforming %s TaskChain into StepChain'%wf)
            #    #sendEmail('convertion to stepchain','Transforming %s TaskChain into StepChain'%wf)

            wfi.sendLog('injector', "considering %s" % wf)

            new_wf = Workflow(name=wf,
                              status=options.setstatus,
                              wm_status=options.wmstatus)
            session.add(new_wf)
            session.commit()
            time.sleep(0.5)
        else:
            #print "already have",wf
            pass

    if cannot_inject:
        #sendEmail('workflow duplicates','These workflow cannot be added in because of duplicates \n\n %s'%( '\n'.join(cannot_inject)))
        sendLog(
            'injector',
            'These workflow cannot be added in because of duplicates \n\n %s' %
            ('\n'.join(cannot_inject)),
            level='warning')

    for wf in to_convert:
        os.system(
            './Unified/rejector.py --clone --to_step --comments \"Transform to StepChain\" %s'
            % wf)

    ## passing a round of invalidation of what needs to be invalidated
    if use_mcm and (options.invalidate or True):
        invalidator(url)

    no_replacement = set()

    #print "getting all transfers"
    #all_transfers=session.query(Transfer).all()
    #print "go!"

    ## pick up replacements
    for wf in session.query(Workflow).filter(
            Workflow.status == 'trouble').all():
        print wf.name
        if specific and not specific in wf.name: continue
        print wf.name
        wfi = workflowInfo(url, wf.name)
        wl = wfi.request  #getWorkLoad(url, wf.name)
        familly = getWorkflowById(url, wl['PrepID'])
        true_familly = []
        for member in familly:
            if member == wf.name: continue
            fwl = getWorkLoad(url, member)
            if options.replace:
                if member != options.replace: continue
            else:
                if fwl['RequestDate'] < wl['RequestDate']: continue
                if fwl['RequestType'] == 'Resubmission': continue
                if fwl['RequestStatus'] in ['None', None, 'new']: continue
                if fwl['RequestStatus'] in [
                        'rejected', 'rejected-archived', 'aborted',
                        'aborted-archived'
                ]:
                    continue
            true_familly.append(fwl)

        if len(true_familly) == 0:
            #sendLog('injector','%s had no replacement'%wf.name, level='critical')
            if wfi.isRelval():
                #wfi.sendLog('injector','the workflow was found in trouble with no replacement. As a relval, there is no clean way to handle this.')
                wfi.sendLog(
                    'injector',
                    'the workflow was found in trouble with no replacement. As a relval, there is no clean way to handle this. Setting forget'
                )
                wf.status = 'forget'
                session.commit()
            else:
                wfi.sendLog(
                    'injector',
                    'the workflow was found in trouble with no replacement')
                no_replacement.add(wf.name)
            continue
        else:
            wfi.sendLog(
                'injector',
                'the workflow was found in trouble and has a replacement')

        print wf.name, "has", len(familly), "familly members"
        print wf.name, "has", len(true_familly), "true familly members"

        ##we cannot have more than one of them !!! pick the last one
        if len(true_familly) > 1:
            #sendEmail('multiple wf','please take a look at injector for %s'%wf.name)
            sendLog('injector',
                    'Multiple wf in line, will take the last one for %s \n%s' %
                    (wf.name, ', '.join(fwl['RequestName']
                                        for fwl in true_familly)),
                    level='critical')

        for fwl in true_familly[-1:]:
            member = fwl['RequestName']
            new_wf = session.query(Workflow).filter(
                Workflow.name == member).first()
            if not new_wf:
                sendLog('injector',
                        "putting %s as replacement of %s" % (member, wf.name))
                status = 'away'
                if fwl['RequestStatus'] in ['assignment-approved']:
                    status = 'considered'
                new_wf = Workflow(name=member,
                                  status=status,
                                  wm_status=fwl['RequestStatus'])
                wf.status = 'forget'
                session.add(new_wf)
            else:
                if new_wf.status == 'forget': continue
                sendLog(
                    'injector',
                    "getting %s as replacement of %s" % (new_wf.name, wf.name))
                wf.status = 'forget'

            for tr in session.query(TransferImp).filter(
                    TransferImp.workflow_id == wf.id).all():
                ## get all transfer working for the old workflow
                existing = session.query(TransferImp).filter(
                    TransferImp.phedexid == tr.phedexid).filter(
                        TransferImp.workflow_id == new_wf.id).all()
                tr.active = False  ## disable the old one
                if not existing:
                    ## create the transfer object for the new dependency
                    tri = TransferImp(phedexid=tr.phedexid, workflow=new_wf)
                    session.add(tri)
                session.commit()

        ## don't do that automatically
        #wf.status = 'forget'
        session.commit()
    if no_replacement:
        #sendEmail('workflow with no replacement','%s \n are dangling there'%( '\n'.join(no_replacement)))
        sendLog('injector',
                'workflow with no replacement\n%s \n are dangling there' %
                ('\n'.join(no_replacement)),
                level='critical')
Example #2
0
            for usor in usors:
                d =time.mktime(time.strptime("-".join(map(str,usor['RequestDate'])), "%Y-%m-%d-%H-%M-%S"))
                secondary_timeout[dataset] = max(secondary_timeout[dataset],d)

        if secondary_timeout[dataset]: ## different than zero
            delay_days = 30
            delay = delay_days*24*60*60 # 30 days     
            if (now-secondary_timeout[dataset])>delay:
                print "unlocking secondary input after",delay_days,"days"
                unlock = True


        tier = dataset.split('/')[-1]
        creators = getWorkflowByOutput( url, dataset , details=True)
        if not creators and not tier == 'RAW':
            ds_status = getDatasetStatus( dataset )
            if not '-v0/' in dataset and ds_status!=None:
                sendEmail('failing get by output','%s has not been produced by anything?'%dataset)
                newly_locking.add(dataset)
                continue
            else:
                # does not matter, cannot be an OK dataset
                unlock = True
                bad_ds = True
        creators_status = [r['RequestStatus'] for r in creators]
        print "Statuses of workflow that made the dataset",dataset,"are",creators_status
        if all([status in ['failed','aborted','rejected','aborted-archived','rejected-archived'] for status in creators_status]):
            ## crap 
            print "\tunlocking",dataset,"for bad workflow statuses"
            unlock = True
            bad_ds = True
Example #3
0
def outcleanor(url, options):

    if options.approve:
        for user in ['*Vlimant']:#,'*Cremonesi']:
            deletes = listDelete( url , user = user)
            for (site,who,tid) in deletes:
                if 'MSS' in site: continue### ever
                print site,who,tid
                print "approving deletion"
                print approveSubscription(url, tid, nodes = [site], comments = 'Production cleaning by data ops')
        return

    

    sites_and_datasets = defaultdict(list)
    our_copies = defaultdict(list)
    wf_cleaned = {}
    
    wfs = []
    for fetch in options.fetch.split(','):
        wfs.extend(session.query(Workflow).filter(Workflow.status==fetch).all())

    random.shuffle( wfs )
    last_answer = None
    for wfo in wfs :
        if options.number and len(wf_cleaned)>= options.number:
            print "Reached",options.number,"cleaned"
            break
        print '-'*100
        wfi = workflowInfo(url, wfo.name)
        goes = {} # boolean per output
        for dataset in wfi.request['OutputDatasets']:
            goes[dataset] = False
            keep_one_out = True
            status = getDatasetStatus( dataset )
            print "\n\tLooking at",dataset,status,"\n"
            vetoes = None
            if status == 'INVALID':
                vetoes = ['Export','Buffer'] ## can take themselves out
                keep_one_out = False # just wipe clean

            elif status == None:
                print dataset,"actually does not exist. skip"
                goes[dataset] = True
                continue

            elif status in ['PRODUCTION','VALID'] and wfo.status in ['forget','trouble']:
                print dataset,"should probably be invalidated. (",wfo.status,") skip"
                keep_one_out = False # just wipe clean
                continue ## you are not sure. just skip it for the time being

            elif status == 'PRODUCTION' and wfo.status in ['clean']:
                print dataset,"should probably be set valid .skip"
                continue ## you are not sure. just skip it for the time being

            if status == 'VALID' and dataset.startswith('/MinBias'):
                print "This is a /MinBias. skip"
                continue

            if '/DQM' in dataset:
                keep_one_out = False

            total_size = getDatasetSize( dataset )
            
            our_presence = getDatasetPresence(url, dataset, complete=None, group="DataOps", vetoes=vetoes)
            also_our_presence = getDatasetPresence(url, dataset, complete=None, group="", vetoes=vetoes)
            
            ## merge in one unique dict
            for site in also_our_presence:
                if site in our_presence:
                    there,frac = our_presence[site]
                    other,ofrac = also_our_presence[site]
                    our_presence[site] = (max(there,other),max(frac,ofrac))
                else:
                    our_presence[site] = also_our_presence[site]
                
            if our_presence: print our_presence

            ## analysis ops copies need to be taken into account
            anaops_presence = getDatasetPresence(url, dataset, complete=None, group="AnalysisOps")
            own_by_anaops = anaops_presence.keys()
            
            ## all our copies
            to_be_cleaned = our_presence.keys()
            if not len(to_be_cleaned):
                print "nowhere to be found of ours,",len(own_by_anaops),"in analysi ops pool"
                goes[dataset] = True
                continue

            print "Where we own bits of dataset"
            print to_be_cleaned
     

            if len(own_by_anaops):
                ## remove site with the anaops copies
                to_be_cleaned = list(set(to_be_cleaned) - set(own_by_anaops))
                keep_one_out = False ## in that case, just remove our copies
                print "Own by anaops (therefore not keep a copy of ours)"
                print own_by_anaops
            else:
                ## we should not be looking at anything that was not passed to DDM, otherwise we'll be cutting the grass under our feet
                using_the_same = getWorkflowByInput(url, dataset, details=True)
                conflict = False
                for other in using_the_same:
                    if other['RequestName'] == wfo.name: continue
                    if other['RequestType'] == 'Resubmission': continue
                    if not other['RequestStatus'] in ['announced','normal-archived','aborted','rejected','aborted-archived','rejected-archived','closed-out','None',None]:
                        print other['RequestName'],'is in status',other['RequestStatus'],'preventing from cleaning',dataset
                        conflict=True
                        break
                if conflict:
                    continue

                ## not being used. a bit less dangerous to clean-out
                ## keep one full copy out there
                full_copies = [site for (site,(there,fract)) in our_presence.items() if there]
                if keep_one_out:
                    if not len(full_copies):
                        print "we do not own a full copy of",dataset,status,wfo.status,".skip"
                        continue
                    stay_there = random.choice( full_copies ) #at a place own by ops
                    print "Where we keep a full copy", stay_there
                    to_be_cleaned.remove( stay_there )
                    our_copies[stay_there].append( dataset )
                else:
                    print "We do not want to keep a copy of ",dataset,status,wfo.status

            if len(to_be_cleaned):
                print "Where we can clean"
                print to_be_cleaned
                for site in to_be_cleaned:
                    sites_and_datasets[site].append( (dataset, total_size*our_presence[site][1]/100., status) )
                goes[dataset] = True
            else:
                print "no cleaning to be done"
                goes[dataset] = True

        print wfo.name,"scrutinized"
        if all(goes.values()):
            print "\t",wfo.name,"can toggle -out"
        def ask():
            global last_answer
            last_answer = raw_input('go on ?')
            return last_answer
        if options.auto or ask() in ['y','']:
            if all(goes.values()):
                wfo.status = wfo.status+'-out'
                wf_cleaned[wfo.name] = wfo.status
            continue
        elif last_answer in ['q','n']:
            break
        else:
            return 

    if options.auto:
        pass
    elif last_answer in ['q']:
        return

    print "Potential cleanups"
    for (site,items) in sites_and_datasets.items():
        cleanup = sum([size for (_,size,_) in items])
        print "\n\t potential cleanup of","%8.4f"%cleanup,"GB at ",site
        print "\n".join([ds+" "+st for ds,_,st in items])
        datasets = [ ds for ds,_,st in items]

    print "Copies and bits we are going to delete"
    print json.dumps( sites_and_datasets, indent=2)

    print "Copies we are keeping"
    print json.dumps( our_copies, indent=2 )     

    print "Workflows cleaned for output"
    print json.dumps( wf_cleaned, indent=2 )
    stamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
    open('outcleaning_%s.json'%stamp,'w').write( json.dumps( sites_and_datasets, indent=2))
    open('keepcopies_%s.json'%stamp,'w').write( json.dumps( our_copies, indent=2))
    open('wfcleanout_%s.json'%stamp,'w').write( json.dumps( wf_cleaned, indent=2))


    if (not options.test) and (options.auto or raw_input("Satisfied ? (y will trigger status change and deletion requests)") in ['y']):
        for (site,items) in sites_and_datasets.items():
            datasets = [ ds for ds,_,st in items]
            print "making deletion to",site
            result = makeDeleteRequest(url, site, datasets, "Cleanup output after production. DataOps will take care of approving it.")
            print result
            ## approve it right away ?
            if 'MSS' in site: continue
            if 'Export' in site: continue
            if 'Buffer' in site: continue
            for did in [item['id'] for item in result['phedex']['request_created']]:
                print "auto-approve disabled, but ready"
                #approveSubscription(url, did, nodes = [site], comments = 'Auto-approving production cleaning deletion')
                pass
        session.commit()
    else:
        print "Not making the deletion and changing statuses"
Example #4
0
def injector(url, options, specific):
    mlock = moduleLock()
    if mlock(): return

    use_mcm = True
    up = componentInfo(soft=['mcm','wtc','jira'] )
    if not up.check(): return
    use_mcm = up.status['mcm']

    UC = unifiedConfiguration()

    transform_keywords = UC.get('convert_to_stepchain')

    workflows = getWorkflows(url, status=options.wmstatus, user=options.user)
    for user in UC.get("user_rereco"):
        workflows.extend( getWorkflows(url, status=options.wmstatus, user=user, rtype="ReReco")) 
    for user in (options.user_relval.split(',') if options.user_relval else UC.get("user_relval")) :
        workflows.extend( getWorkflows(url, status=options.wmstatus, user=user, rtype="TaskChain")) 
    for user in (options.user_storeresults.split(',') if options.user_storeresults else UC.get("user_storeresults")) :
        workflows.extend( getWorkflows(url, status=options.wmstatus, user=user, rtype="StoreResults"))

    print len(workflows),"in line"
    cannot_inject = set()
    to_convert = set()
    status_cache = defaultdict(str)

    ## browse for assignment-approved requests, browsed for ours, insert the diff
    for wf in workflows:
        if specific and not specific in wf: continue

        exists = session.query(Workflow).filter(Workflow.name == wf ).first()
        if not exists:
            wfi = workflowInfo(url, wf)
            ## check first that there isn't related here with something valid
            can_add = True
            ## first try at finding a match
            familly = session.query(Workflow).filter(Workflow.name.contains(wfi.request['PrepID'])).all()
            if not familly:
                pids = wfi.getPrepIDs()
                req_familly = []
                for pid in pids:
                    req_familly.extend( getWorkflowById( url, pid, details=True) )
                    
                familly = []
                print len(req_familly),"members"
                for req_member in req_familly:
                    #print "member",req_member['RequestName']
                    owfi = workflowInfo(url, req_member['RequestName'], request=req_member)
                    other_pids = owfi.getPrepIDs()
                    if set(pids) == set(other_pids):
                        ## this is a real match
                        familly.extend( session.query(Workflow).filter(Workflow.name == req_member['RequestName']).all() )

            for lwfo in familly:
                if lwfo:
                    ## we have it already
                    if not lwfo.status in ['forget','trouble','forget-unlock','forget-out-unlock']:
                        wfi.sendLog('injector',"Should not put %s because of %s %s"%( wf, lwfo.name,lwfo.status ))
                        sendLog('injector',"Should not put %s because of %s %s"%( wf, lwfo.name,lwfo.status ), level='critical')
                        print "Should not put",wf,"because of",lwfo.name,lwfo.status
                        cannot_inject.add( wf )
                        can_add = False
            ## add a check on validity of input datasets
            _,prim,par,sec = wfi.getIO()
            for d in list(prim)+list(par)+list(sec):
                if not d in status_cache:
                    status_cache[d] = getDatasetStatus(d)
                if status_cache[d] != 'VALID':
                    wfi.sendLog('injector',"One of the input is not VALID. %s : %s"%( d, status_cache[d]))
                    sendLog('injector',"One of the input of %s is not VALID. %s : %s"%( wf, d, status_cache[d]), level='critical')
                    can_add = False
                #else:
                #    ##make sure that all blocks get closed
                #    closeAllBlocks(url, d)

                ## check for any file in phedex, to verify existence
                _,ph_files,_,_ = getDatasetFiles(url, d)
                if not ph_files and not ( 'StoreResults' == wfi.request.setdefault('RequestType',None) ):
                    wfi.sendLog('injector',"One of the input has no file in phedex: %s" % d )
                    sendLog('injector',"One of the input has no file in phedex: %s"% d, level='critical')
                    can_add = False

            ### ban some workflow that you don't like anymore
            #outputs = wfi.request['OutputDatasets']



            if not can_add: continue

            ## temporary hack to transform specific taskchain into stepchains
            good_for_stepchain = wfi.isGoodToConvertToStepChain( keywords = transform_keywords)
            #good_for_stepchain = wfi.isGoodToConvertToStepChain( keywords = None) 


            ## match keywords and technical constraints
            if (not options.no_convert) and good_for_stepchain and not wfi.isRelval():
                to_convert.add( wf )
                wfi.sendLog('injector','Transforming %s TaskChain into StepChain'%wf)
                sendEmail('convertion to stepchain','Transforming %s TaskChain into StepChain'%wf)

            wfi.sendLog('injector',"considering %s"%wf)

            new_wf = Workflow( name = wf , status = options.setstatus, wm_status = options.wmstatus) 
            session.add( new_wf )
            session.commit()
            time.sleep(0.5)
        else:
            #print "already have",wf
            pass
    

    if cannot_inject:
        #sendEmail('workflow duplicates','These workflow cannot be added in because of duplicates \n\n %s'%( '\n'.join(cannot_inject)))
        sendLog('injector','These workflow cannot be added in because of duplicates \n\n %s'%( '\n'.join(cannot_inject)), level='critical')
        
    for wf in to_convert:
        os.system('./Unified/rejector.py --clone --to_step --comments \"Transform to StepChain\" %s'% wf)

    ## passing a round of invalidation of what needs to be invalidated
    if use_mcm and (options.invalidate or True):
        invalidator(url)

    no_replacement = set()

    #print "getting all transfers"
    #all_transfers=session.query(Transfer).all()
    #print "go!"

    ## pick up replacements
    for wf in session.query(Workflow).filter(Workflow.status == 'trouble').all():
        print wf.name
        if specific and not specific in wf.name: continue
        print wf.name
        wfi = workflowInfo(url, wf.name )
        wl = wfi.request #getWorkLoad(url, wf.name)
        familly = getWorkflowById( url, wl['PrepID'] )
        true_familly = []
        for member in familly:
            if member == wf.name: continue
            fwl = getWorkLoad(url , member)
            if options.replace:
                if member != options.replace: continue
            else:
                if fwl['RequestDate'] < wl['RequestDate']: continue
                if fwl['RequestType']=='Resubmission': continue
                if fwl['RequestStatus'] in ['None',None,'new']: continue
                if fwl['RequestStatus'] in ['rejected','rejected-archived','aborted','aborted-archived']: continue
            true_familly.append( fwl )

        if len(true_familly)==0:
            #sendLog('injector','%s had no replacement'%wf.name, level='critical')
            if wfi.isRelval():
                #wfi.sendLog('injector','the workflow was found in trouble with no replacement. As a relval, there is no clean way to handle this.')
                wfi.sendLog('injector','the workflow was found in trouble with no replacement. As a relval, there is no clean way to handle this. Setting forget')
                wf.status = 'forget'
                session.commit()
            else:
                wfi.sendLog('injector','the workflow was found in trouble with no replacement')
                no_replacement.add( wf.name )
            continue
        else:
            wfi.sendLog('injector','the workflow was found in trouble and has a replacement')
                    
        print wf.name,"has",len(familly),"familly members"
        print wf.name,"has",len(true_familly),"true familly members"

        ##we cannot have more than one of them !!! pick the last one
        if len(true_familly)>1:
            #sendEmail('multiple wf','please take a look at injector for %s'%wf.name)
            sendLog('injector','Multiple wf in line, will take the last one for %s \n%s'%( wf.name, ', '.join(fwl['RequestName'] for fwl in true_familly)), level='critical')

        for fwl in true_familly[-1:]:
            member = fwl['RequestName']
            new_wf = session.query(Workflow).filter(Workflow.name == member).first()
            if not new_wf:
                sendLog('injector',"putting %s as replacement of %s"%( member, wf.name))
                status = 'away'
                if fwl['RequestStatus'] in ['assignment-approved']:
                    status = 'considered'
                new_wf = Workflow( name = member, status = status, wm_status = fwl['RequestStatus'])
                wf.status = 'forget'
                session.add( new_wf ) 
            else:
                if new_wf.status == 'forget': continue
                sendLog('injector',"getting %s as replacement of %s"%( new_wf.name, wf.name ))
                wf.status = 'forget'

            for tr in session.query(TransferImp).filter( TransferImp.workflow_id == wf.id).all():
                ## get all transfer working for the old workflow
                existing = session.query(TransferImp).filter( TransferImp.phedexid == tr.phedexid).filter( TransferImp.workflow_id == new_wf.id).all()
                tr.active = False ## disable the old one
                if not existing:
                    ## create the transfer object for the new dependency
                    tri = TransferImp( phedexid = tr.phedexid,
                                       workflow = new_wf)
                    session.add( tri )
                session.commit()


        ## don't do that automatically
        #wf.status = 'forget'
        session.commit()
    if no_replacement:
        #sendEmail('workflow with no replacement','%s \n are dangling there'%( '\n'.join(no_replacement)))
        sendLog('injector','workflow with no replacement\n%s \n are dangling there'% ( '\n'.join(no_replacement)), level='critical')
Example #5
0
                d = time.mktime(
                    time.strptime("-".join(map(str, usor['RequestDate'])),
                                  "%Y-%m-%d-%H-%M-%S"))
                secondary_timeout[dataset] = max(secondary_timeout[dataset], d)

        if secondary_timeout[dataset]:  ## different than zero
            delay_days = 30
            delay = delay_days * 24 * 60 * 60  # 30 days
            if (now - secondary_timeout[dataset]) > delay:
                print "unlocking secondary input after", delay_days, "days"
                unlock = True

        tier = dataset.split('/')[-1]
        creators = getWorkflowByOutput(url, dataset, details=True)
        if not creators and not tier == 'RAW':
            ds_status = getDatasetStatus(dataset)
            if not '-v0/' in dataset and ds_status != None:
                sendEmail('failing get by output',
                          '%s has not been produced by anything?' % dataset)
                newly_locking.add(dataset)
                continue
            else:
                # does not matter, cannot be an OK dataset
                unlock = True
                bad_ds = True
        creators_status = [r['RequestStatus'] for r in creators]
        print "Statuses of workflow that made the dataset", dataset, "are", creators_status
        if all([
                status in [
                    'failed', 'aborted', 'rejected', 'aborted-archived',
                    'rejected-archived'
Example #6
0
    def run(self):
        site = self.site
        print "checking on site", site
        si = self.SI
        UC = self.UC
        RDI = self.RDI
        options = self.options
        locks = self.locks
        waiting = self.waiting
        stuck = self.stuck
        missing = self.missing
        remainings = {}

        ds = si.getRemainingDatasets(si.CE_to_SE(site))
        #print len(ds)
        taken_size = 0.
        sum_waiting = 0.
        sum_stuck = 0.
        sum_missing = 0.
        sum_unlocked = 0.
        n_ds = options.ndatasets
        i_ds = 0
        ds_threads = []
        for i_ds, (size, dataset) in enumerate(ds):
            if n_ds and i_ds >= n_ds: break
            remainings[dataset] = {"size": size, "reasons": []}
            #print "-"*10
            if not dataset in locks:
                #print dataset,"is not locked"
                sum_unlocked += size
                remainings[dataset]["reasons"].append('unlock')
            else:
                remainings[dataset]["reasons"].append('lock')
            if dataset in waiting:
                #print dataset,"is waiting for custodial"
                sum_waiting += size
                remainings[dataset]["reasons"].append('tape')

            if dataset in stuck:
                sum_stuck += size
                remainings[dataset]["reasons"].append('stuck-tape')
            if dataset in missing:
                sum_missing += size
                remainings[dataset]["reasons"].append('missing-tape')

            ds_threads.append(DatasetCheckBuster(dataset=dataset, url=url))

        run_threads = ThreadHandler(threads=ds_threads,
                                    label='%s Dataset Threads' % site,
                                    n_threads=10,
                                    start_wait=0,
                                    timeout=None,
                                    verbose=True)
        ## start and sync
        run_threads.run()
        #run_threads.start()
        #while run_threads.is_alive():
        #    time.sleep(10)

        for t in run_threads.threads:
            remainings[t.dataset]["reasons"].extend(t.reasons)
            remainings[t.dataset]["reasons"].sort()
            print t.dataset, remainings[t.dataset]["reasons"]

        #print "\t",sum_waiting,"[GB] could be freed by custodial"
        print "\t", sum_unlocked, "[GB] is not locked by unified"

        print "updating database with remaining datasets"
        RDI.set(site, remainings)
        try:
            eosFile('%s/remaining_%s.json' % (monitor_dir, site),
                    'w').write(json.dumps(remainings, indent=2)).close()
        except:
            pass

        ld = remainings.items()
        ld.sort(key=lambda i: i[1]['size'], reverse=True)
        table = "<html>Updated %s GMT, <a href=remaining_%s.json>json data</a><br>" % (
            time.asctime(time.gmtime()), site)

        accumulate = defaultdict(lambda: defaultdict(float))
        for item in remainings:
            tier = item.split('/')[-1]

            for reason in remainings[item]['reasons']:
                accumulate[reason][tier] += remainings[item]['size']
        table += "<table border=1></thead><tr><th>Reason</th><th>size [TB]</th></thead>"
        for reason in accumulate:
            s = 0
            table += "<tr><td>%s</td><td><ul>" % reason
            subitems = accumulate[reason].items()
            subitems.sort(key=lambda i: i[1], reverse=True)

            for tier, ss in subitems:
                table += "<li> %s : %10.3f</li>" % (tier, ss / 1024.)
                s += ss / 1024.
            table += "</ul>total : %.3f</td>" % s

        table += "</table>\n"
        table += "<table border=1></thead><tr><th>Dataset</th><th>Size [GB]</th><th>Label</th></tr></thead>\n"
        only_unlock = set()
        for item in ld:
            ds_name = item[0]
            reasons = item[1]['reasons']
            sub_url = '<a href="https://cmsweb.cern.ch/das/request?input=%s">%s</a>' % (
                ds_name, ds_name)
            if 'unlock' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/phedex/datasvc/xml/prod/subscriptions?block=%s%%23*&node=%s">block</a>' % (
                    ds_name, site)
            if 'unlock' in reasons or 'input' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?inputdataset=%s&mask=RequestName&mask=RequestStatus">input</a>' % (
                    ds_name)
            if 'unlock' in reasons or 'output' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?outputdataset=%s&mask=RequestName&mask=RequestStatus">output</a>' % (
                    ds_name)
            if 'pilup' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?mc_pileup=%s&mask=RequestName&mask=RequestStatus">secondary</a>' % (
                    ds_name)
            table += "<tr><td>%s</td><td>%d</td><td><ul>%s</ul></td></tr>\n" % (
                sub_url, item[1]['size'], "<li>".join([""] + reasons))
            if reasons == ['unlock']:
                only_unlock.add(item[0])
        table += "</table></html>"
        eosFile('%s/remaining_%s.html' % (monitor_dir, site),
                'w').write(table).close()

        print "checking on unlock only datasets"
        to_ddm = UC.get('tiers_to_DDM')
        #look_at = list(only_unlock)
        look_at = list(only_unlock)[:20]
        #look_at = list([ds for ds in only_unlock if not ds.endswith('NANOAODSIM')])
        for item in look_at:
            tier = item.split('/')[-1]
            ds_status = getDatasetStatus(item)
            print item, ds_status
            if ds_status == 'PRODUCTION':
                print item, "is found", ds_status, "and unklocked on", site
                if options.invalidate_anything_left_production_once_unlocked:
                    print "Setting status to invalid for", item
                    setDatasetStatus(item, 'INVALID')
            if tier in to_ddm:
                print item, "looks like analysis and still dataops on", site
                if options.change_dataops_subs_to_anaops_once_unlocked:
                    print "Sending", item, "to anaops"
                    allCompleteToAnaOps(url, item)
Example #7
0
def injector(url, options, specific):

    use_mcm = True
    up = componentInfo( mcm = use_mcm, soft=['mcm'] )
    if not up.check(): return
    use_mcm = up.status['mcm']

    workflows = getWorkflows(url, status=options.wmstatus, user=options.user)
    workflows.extend( getWorkflows(url, status=options.wmstatus, user='******', rtype="ReReco")) ## regardless of users, pick up all ReReco on the table

    print len(workflows),"in line"
    cannot_inject = set()
    status_cache = defaultdict(str)
    ## browse for assignment-approved requests, browsed for ours, insert the diff
    for wf in workflows:
        if specific and not specific in wf: continue
        exists = session.query(Workflow).filter(Workflow.name == wf ).first()
        if not exists:
            wfi = workflowInfo(url, wf)
            #wl = getWorkLoad(url, wf)
            ## check first that there isn't related here with something valid
            can_add = True
            ## first try at finding a match
            #            print wfi.request
            familly = session.query(Workflow).filter(Workflow.name.contains(wfi.request['PrepID'])).all()
            if not familly:
                #req_familly = getWorkflowById( url, wl['PrepID'])
                #familly = [session.query(Workflow).filter(Workflow.name == member).first() for member in req_familly]
                pids = wfi.getPrepIDs()
                req_familly = []
                for pid in pids:
                    req_familly.extend( getWorkflowById( url, pid, details=True) )
                    
                familly = []
                print len(req_familly),"members"
                for req_member in req_familly:
                    #print "member",req_member['RequestName']
                    owfi = workflowInfo(url, req_member['RequestName'], request=req_member)
                    other_pids = owfi.getPrepIDs()
                    if set(pids) == set(other_pids):
                        ## this is a real match
                        familly.extend( session.query(Workflow).filter(Workflow.name == req_member['RequestName']).all() )

            for lwfo in familly:
                if lwfo:
                    ## we have it already
                    if not lwfo.status in ['forget','trouble','forget-unlock','forget-out-unlock']:
                        sendLog('injector',"Should not put %s because of %s %s"%( wf, lwfo.name,lwfo.status ))
                        print "Should not put",wf,"because of",lwfo.name,lwfo.status
                        cannot_inject.add( wf )
                        can_add = False
            ## add a check on validity of input datasets
            _,prim,par,sec = wfi.getIO()
            for d in list(prim)+list(par)+list(sec):
                if not d in status_cache:
                    status_cache[d] = getDatasetStatus(d)
                if status_cache[d] != 'VALID':
                    wfi.sendLog('injector',"One of the input is not VALID. %s : %s"%( d, status_cache[d]))
                    sendLog('injector',"One of the input of %s is not VALID. %s : %s"%( wf, d, status_cache[d]))
                    can_add = False
            if not can_add: continue
            wfi.sendLog('injector',"considering %s"%wf)

            new_wf = Workflow( name = wf , status = options.setstatus, wm_status = options.wmstatus) 
            session.add( new_wf )
            session.commit()
            time.sleep(0.5)
        else:
            #print "already have",wf
            pass
    
    if cannot_inject:
        #sendEmail('workflow duplicates','These workflow cannot be added in because of duplicates \n\n %s'%( '\n'.join(cannot_inject)))
        sendLog('injector','These workflow cannot be added in because of duplicates \n\n %s'%( '\n'.join(cannot_inject)), level='warning')

    ## passing a round of invalidation of what needs to be invalidated
    if use_mcm and (options.invalidate or True):
        invalidator(url)

    no_replacement = set()

    ## pick up replacements
    for wf in session.query(Workflow).filter(Workflow.status == 'trouble').all():
        print wf.name
        if specific and not specific in wf.name: continue
        print wf.name
        wfi = workflowInfo(url, wf.name )
        wl = wfi.request #getWorkLoad(url, wf.name)
        familly = getWorkflowById( url, wl['PrepID'] )
        true_familly = []
        for member in familly:
            if member == wf.name: continue
            fwl = getWorkLoad(url , member)
            if options.replace:
                if member != options.replace: continue
            else:
                if fwl['RequestDate'] < wl['RequestDate']: continue
                if fwl['RequestType']=='Resubmission': continue
                if fwl['RequestStatus'] in ['None',None,'new']: continue
                if fwl['RequestStatus'] in ['rejected','rejected-archived','aborted','aborted-archived']: continue
            true_familly.append( fwl )

        if len(true_familly)==0:
            #sendLog('injector','%s had no replacement'%wf.name, level='critical')
            wfi.sendLog('injector','the workflow was found in trouble with no replacement')
            no_replacement.add( wf.name )
            continue
        else:
            wfi.sendLog('injector','the workflow was found in trouble and has a replacement')
                    
        print wf.name,"has",len(familly),"familly members"
        print wf.name,"has",len(true_familly),"true familly members"

        ##we cannot have more than one of them !!! pick the last one
        if len(true_familly)>1:
            #sendEmail('multiple wf','please take a look at injector for %s'%wf.name)
            sendLog('injector','Multiple wf in line, will take the last one for %s \n%s'%( wf.name, ', '.join(fwl['RequestName'] for fwl in true_familly)), level='critical')

        for fwl in true_familly[-1:]:
            member = fwl['RequestName']
            new_wf = session.query(Workflow).filter(Workflow.name == member).first()
            if not new_wf:
                sendLog('injector',"putting %s as replacement of %s"%( member, wf.name))
                status = 'away'
                if fwl['RequestStatus'] in ['assignment-approved']:
                    status = 'considered'
                new_wf = Workflow( name = member, status = status, wm_status = fwl['RequestStatus'])
                wf.status = 'forget'
                session.add( new_wf ) 
            else:
                if new_wf.status == 'forget': continue
                sendLog('injector',"getting %s as replacement of %s"%( new_wf.name, wf.name ))
                wf.status = 'forget'

            for tr in session.query(Transfer).all():
                if wf.id in tr.workflows_id:
                    sw = copy.deepcopy(tr.workflows_id)
                    sw.remove( wf.id)
                    sw.append(new_wf.id)
                    tr.workflows_id = sw
                    print tr.phedexid,"got",new_wf.name
                    if new_wf.status != 'away':
                        print "\t setting it considered"
                        new_wf.status = 'considered'
                    if tr.phedexid<0: ## set it back to positive
                        tr.phedexid = -tr.phedexid
                    session.commit()
                        

        ## don't do that automatically
        #wf.status = 'forget'
        session.commit()
    if no_replacement:
        #sendEmail('workflow with no replacement','%s \n are dangling there'%( '\n'.join(no_replacement)))
        sendLog('injector','workflow with no replacement, %s \n are dangling there'% ( '\n'.join(no_replacement)), level='critical')
Example #8
0
def outcleanor(url, options):

    if options.approve:
        for user in ['*Vlimant']:  #,'*Cremonesi']:
            deletes = listDelete(url, user=user)
            for (site, who, tid) in deletes:
                if 'MSS' in site: continue  ### ever
                print site, who, tid
                print "approving deletion"
                print approveSubscription(
                    url,
                    tid,
                    nodes=[site],
                    comments='Production cleaning by data ops')
        return

    sites_and_datasets = defaultdict(list)
    our_copies = defaultdict(list)
    wf_cleaned = {}

    wfs = []
    for fetch in options.fetch.split(','):
        wfs.extend(
            session.query(Workflow).filter(Workflow.status == fetch).all())

    random.shuffle(wfs)
    last_answer = None
    for wfo in wfs:
        if options.number and len(wf_cleaned) >= options.number:
            print "Reached", options.number, "cleaned"
            break
        print '-' * 100
        wfi = workflowInfo(url, wfo.name)
        goes = {}  # boolean per output
        for dataset in wfi.request['OutputDatasets']:
            goes[dataset] = False
            keep_one_out = True
            status = getDatasetStatus(dataset)
            print "\n\tLooking at", dataset, status, "\n"
            vetoes = None
            if status == 'INVALID':
                vetoes = ['Export', 'Buffer']  ## can take themselves out
                keep_one_out = False  # just wipe clean

            elif status == None:
                print dataset, "actually does not exist. skip"
                goes[dataset] = True
                continue

            elif status in ['PRODUCTION', 'VALID'
                            ] and wfo.status in ['forget', 'trouble']:
                print dataset, "should probably be invalidated. (", wfo.status, ") skip"
                keep_one_out = False  # just wipe clean
                continue  ## you are not sure. just skip it for the time being

            elif status == 'PRODUCTION' and wfo.status in ['clean']:
                print dataset, "should probably be set valid .skip"
                continue  ## you are not sure. just skip it for the time being

            if status == 'VALID' and dataset.startswith('/MinBias'):
                print "This is a /MinBias. skip"
                continue

            if '/DQM' in dataset:
                keep_one_out = False

            total_size = getDatasetSize(dataset)

            our_presence = getDatasetPresence(url,
                                              dataset,
                                              complete=None,
                                              group="DataOps",
                                              vetoes=vetoes)
            also_our_presence = getDatasetPresence(url,
                                                   dataset,
                                                   complete=None,
                                                   group="",
                                                   vetoes=vetoes)

            ## merge in one unique dict
            for site in also_our_presence:
                if site in our_presence:
                    there, frac = our_presence[site]
                    other, ofrac = also_our_presence[site]
                    our_presence[site] = (max(there, other), max(frac, ofrac))
                else:
                    our_presence[site] = also_our_presence[site]

            if our_presence: print our_presence

            ## analysis ops copies need to be taken into account
            anaops_presence = getDatasetPresence(url,
                                                 dataset,
                                                 complete=None,
                                                 group="AnalysisOps")
            own_by_anaops = anaops_presence.keys()

            ## all our copies
            to_be_cleaned = our_presence.keys()
            if not len(to_be_cleaned):
                print "nowhere to be found of ours,", len(
                    own_by_anaops), "in analysi ops pool"
                goes[dataset] = True
                continue

            print "Where we own bits of dataset"
            print to_be_cleaned

            if len(own_by_anaops):
                ## remove site with the anaops copies
                to_be_cleaned = list(set(to_be_cleaned) - set(own_by_anaops))
                keep_one_out = False  ## in that case, just remove our copies
                print "Own by anaops (therefore not keep a copy of ours)"
                print own_by_anaops
            else:
                ## we should not be looking at anything that was not passed to DDM, otherwise we'll be cutting the grass under our feet
                using_the_same = getWorkflowByInput(url, dataset, details=True)
                conflict = False
                for other in using_the_same:
                    if other['RequestName'] == wfo.name: continue
                    if other['RequestType'] == 'Resubmission': continue
                    if not other['RequestStatus'] in [
                            'announced', 'normal-archived', 'aborted',
                            'rejected', 'aborted-archived',
                            'rejected-archived', 'closed-out', 'None', None
                    ]:
                        print other['RequestName'], 'is in status', other[
                            'RequestStatus'], 'preventing from cleaning', dataset
                        conflict = True
                        break
                if conflict:
                    continue

                ## not being used. a bit less dangerous to clean-out
                ## keep one full copy out there
                full_copies = [
                    site for (site, (there, fract)) in our_presence.items()
                    if there
                ]
                if keep_one_out:
                    if not len(full_copies):
                        print "we do not own a full copy of", dataset, status, wfo.status, ".skip"
                        continue
                    stay_there = random.choice(
                        full_copies)  #at a place own by ops
                    print "Where we keep a full copy", stay_there
                    to_be_cleaned.remove(stay_there)
                    our_copies[stay_there].append(dataset)
                else:
                    print "We do not want to keep a copy of ", dataset, status, wfo.status

            if len(to_be_cleaned):
                print "Where we can clean"
                print to_be_cleaned
                for site in to_be_cleaned:
                    sites_and_datasets[site].append(
                        (dataset, total_size * our_presence[site][1] / 100.,
                         status))
                goes[dataset] = True
            else:
                print "no cleaning to be done"
                goes[dataset] = True

        print wfo.name, "scrutinized"
        if all(goes.values()):
            print "\t", wfo.name, "can toggle -out"

        def ask():
            global last_answer
            last_answer = raw_input('go on ?')
            return last_answer

        if options.auto or ask() in ['y', '']:
            if all(goes.values()):
                wfo.status = wfo.status + '-out'
                wf_cleaned[wfo.name] = wfo.status
            continue
        elif last_answer in ['q', 'n']:
            break
        else:
            return

    if options.auto:
        pass
    elif last_answer in ['q']:
        return

    print "Potential cleanups"
    for (site, items) in sites_and_datasets.items():
        cleanup = sum([size for (_, size, _) in items])
        print "\n\t potential cleanup of", "%8.4f" % cleanup, "GB at ", site
        print "\n".join([ds + " " + st for ds, _, st in items])
        datasets = [ds for ds, _, st in items]

    print "Copies and bits we are going to delete"
    print json.dumps(sites_and_datasets, indent=2)

    print "Copies we are keeping"
    print json.dumps(our_copies, indent=2)

    print "Workflows cleaned for output"
    print json.dumps(wf_cleaned, indent=2)
    stamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
    open('outcleaning_%s.json' % stamp,
         'w').write(json.dumps(sites_and_datasets, indent=2))
    open('keepcopies_%s.json' % stamp,
         'w').write(json.dumps(our_copies, indent=2))
    open('wfcleanout_%s.json' % stamp,
         'w').write(json.dumps(wf_cleaned, indent=2))

    if (not options.test) and (options.auto or raw_input(
            "Satisfied ? (y will trigger status change and deletion requests)")
                               in ['y']):
        for (site, items) in sites_and_datasets.items():
            datasets = [ds for ds, _, st in items]
            print "making deletion to", site
            result = makeDeleteRequest(
                url, site, datasets,
                "Cleanup output after production. DataOps will take care of approving it."
            )
            print result
            ## approve it right away ?
            if 'MSS' in site: continue
            if 'Export' in site: continue
            if 'Buffer' in site: continue
            for did in [
                    item['id'] for item in result['phedex']['request_created']
            ]:
                print "auto-approve disabled, but ready"
                #approveSubscription(url, did, nodes = [site], comments = 'Auto-approving production cleaning deletion')
                pass
        session.commit()
    else:
        print "Not making the deletion and changing statuses"
Example #9
0
        if secondary_timeout[dataset]:  ## different than zero
            delay_days = 30
            delay = delay_days * 24 * 60 * 60  # 30 days
            if (now - secondary_timeout[dataset]) > delay:
                print "unlocking secondary input after", delay_days, "days"
                unlock = True
            else:
                print "keep a lock on secondary within", delay_days, "days"
                unlock = False
                newly_locking.add(dataset)
                continue

        tier = dataset.split("/")[-1]
        creators = getWorkflowByOutput(url, dataset, details=True)
        if not creators and not tier == "RAW" and not "-PromptReco-" in dataset:
            ds_status = getDatasetStatus(dataset)
            if not "-v0/" in dataset and ds_status != None:
                # sendEmail('failing get by output','%s has not been produced by anything?'%dataset)
                sendLog(
                    "lockor", "failing get by output, %s has not been produced by anything?" % dataset, level="critical"
                )
                newly_locking.add(dataset)
                continue
            else:
                # does not matter, cannot be an OK dataset
                unlock = True
                bad_ds = True
        creators_status = [r["RequestStatus"] for r in creators]
        print "Statuses of workflow that made the dataset", dataset, "are", creators_status
        if len(creators_status) and all(
            [
Example #10
0
    def run(self):
        site = self.site
        print "checking on site",site
        si = self.SI
        UC = self.UC
        RDI = self.RDI
        options = self.options
        locks = self.locks
        waiting = self.waiting
        stuck = self.stuck
        missing = self.missing
        remainings = {}
        
        ds = si.getRemainingDatasets(si.CE_to_SE(site))
        #print len(ds)
        taken_size=0.
        sum_waiting=0.
        sum_stuck=0.
        sum_missing=0.
        sum_unlocked=0.
        n_ds = options.ndatasets
        i_ds = 0
        ds_threads = []
        for i_ds,(size,dataset) in enumerate(ds):
            if n_ds and i_ds>=n_ds: break
            remainings[dataset] = {"size" : size, "reasons": []}
            #print "-"*10
            if not dataset in locks:
                #print dataset,"is not locked"
                sum_unlocked += size
                remainings[dataset]["reasons"].append('unlock')
            else:
                remainings[dataset]["reasons"].append('lock')
            if dataset in waiting:
                #print dataset,"is waiting for custodial"
                sum_waiting+=size
                remainings[dataset]["reasons"].append('tape')

            if dataset in stuck:
                sum_stuck+=size
                remainings[dataset]["reasons"].append('stuck-tape')
            if dataset in missing:
                sum_missing +=size
                remainings[dataset]["reasons"].append('missing-tape')

            ds_threads.append( DatasetCheckBuster( dataset = dataset,
                                                   url = url))

        
        run_threads = ThreadHandler( threads = ds_threads,
                                     label = '%s Dataset Threads'%site,
                                     n_threads = 10 ,
                                     start_wait = 0,
                                     timeout = None,
                                     verbose=True)
        ## start and sync
        run_threads.run()
        #run_threads.start()
        #while run_threads.is_alive():
        #    time.sleep(10)        

        for t in run_threads.threads:
            remainings[t.dataset]["reasons"].extend( t.reasons )
            remainings[t.dataset]["reasons"].sort()
            print t.dataset,remainings[t.dataset]["reasons"]

        #print "\t",sum_waiting,"[GB] could be freed by custodial"
        print "\t",sum_unlocked,"[GB] is not locked by unified"

        print "updating database with remaining datasets"
        RDI.set(site, remainings)
        try:
            eosFile('%s/remaining_%s.json'%(monitor_dir,site),'w').write( json.dumps( remainings , indent=2)).close()
        except:
            pass

        ld = remainings.items()
        ld.sort( key = lambda i:i[1]['size'], reverse=True)
        table = "<html>Updated %s GMT, <a href=remaining_%s.json>json data</a><br>"%(time.asctime(time.gmtime()),site)

        accumulate = defaultdict(lambda : defaultdict(float))
        for item in remainings:
            tier = item.split('/')[-1]

            for reason in remainings[item]['reasons']:
                accumulate[reason][tier] += remainings[item]['size']
        table += "<table border=1></thead><tr><th>Reason</th><th>size [TB]</th></thead>"
        for reason in accumulate:
            s=0
            table += "<tr><td>%s</td><td><ul>"% reason
            subitems = accumulate[reason].items()
            subitems.sort(key = lambda i:i[1], reverse=True)

            for tier,ss in subitems:
                table += "<li> %s : %10.3f</li>"%( tier, ss/1024.)
                s+=  ss/1024.
            table+="</ul>total : %.3f</td>"%s

        table += "</table>\n"
        table += "<table border=1></thead><tr><th>Dataset</th><th>Size [GB]</th><th>Label</th></tr></thead>\n"
        only_unlock = set()
        for item in ld:
            ds_name = item[0]
            reasons = item[1]['reasons']
            sub_url = '<a href="https://cmsweb.cern.ch/das/request?input=%s">%s</a>'%(ds_name, ds_name)
            if 'unlock' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/phedex/datasvc/xml/prod/subscriptions?block=%s%%23*&node=%s">block</a>'%(ds_name, site)
            if 'unlock' in reasons or 'input' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?inputdataset=%s&mask=RequestName&mask=RequestStatus">input</a>'%(ds_name)
            if 'unlock' in reasons or 'output' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?outputdataset=%s&mask=RequestName&mask=RequestStatus">output</a>'%(ds_name)
            if 'pilup' in reasons:
                sub_url += ', <a href="https://cmsweb.cern.ch/reqmgr2/data/request?mc_pileup=%s&mask=RequestName&mask=RequestStatus">secondary</a>'%(ds_name)                
            table+="<tr><td>%s</td><td>%d</td><td><ul>%s</ul></td></tr>\n"%( sub_url, item[1]['size'], "<li>".join([""]+reasons))
            if reasons==['unlock']:
                only_unlock.add(item[0])
        table+="</table></html>"
        eosFile('%s/remaining_%s.html'%(monitor_dir,site),'w').write( table ).close()

        print "checking on unlock only datasets"
        to_ddm = UC.get('tiers_to_DDM')
        #look_at = list(only_unlock)
        look_at = list(only_unlock)[:20]
        #look_at = list([ds for ds in only_unlock if not ds.endswith('NANOAODSIM')])
        for item in look_at:
            tier = item.split('/')[-1]
            ds_status = getDatasetStatus(item)
            print item,ds_status
            if ds_status == 'PRODUCTION':
                print item,"is found",ds_status,"and unklocked on",site
                if options.invalidate_anything_left_production_once_unlocked:
                    print "Setting status to invalid for",item
                    setDatasetStatus(item, 'INVALID')
            if tier in to_ddm:
                print item,"looks like analysis and still dataops on",site
                if options.change_dataops_subs_to_anaops_once_unlocked:
                    print "Sending",item,"to anaops"
                    allCompleteToAnaOps(url, item)