Ejemplo n.º 1
0
def getOutputEvents(dbsApi, dset, verb=False):
    """
    Get the num of events in the output dataset provided it
    has a valid data tier.
    Returns the num of events or -10 if datatier not allowed
    """
    # we cannot calculate completion of ALCARECO, DQMIO and DQMROOT samples
    if '/ALCARECO' in dset or '/DQMIO' in dset or '/DQMROOT' in dset:
        return -10

    outputEvents = getRelValDsetNames.getNumEvents(dbsApi, dset)

    return outputEvents
def getOutputEvents(dbsApi, dset, verb = False):
    """
    Get the num of events in the output dataset provided it
    has a valid data tier.
    Returns the num of events or -10 if datatier not allowed
    """
    # we cannot calculate completion of ALCARECO, DQMIO and DQMROOT samples
    if '/ALCARECO' in dset or  '/DQMIO' in dset or '/DQMROOT' in dset:
        return -10

    outputEvents = getRelValDsetNames.getNumEvents(dbsApi, dset)

    return outputEvents
Ejemplo n.º 3
0
def main():
    parser = optparse.OptionParser()
    parser.add_option(
        '--test',
        action="store_true",
        help=
        'Nothing is closed out. Only test if the workflows are ready to be closed out.',
        dest='test')
    parser.add_option(
        '--verbose',
        action="store_true",
        help=
        'Print out details about the number of events expected and produced.',
        dest='verbose')
    parser.add_option('--correct_env', action="store_true", dest='correct_env')
    (options, args) = parser.parse_args()

    command = ""
    for arg in sys.argv:
        command = command + arg + " "

    if not options.correct_env:
        os.system(
            "source /cvmfs/grid.cern.ch/emi-ui-3.7.3-1_sl6v2/etc/profile.d/setup-emi3-ui-example.sh; export X509_USER_PROXY=/tmp/x509up_u13536; source /tmp/relval/sw/comp.pre/slc6_amd64_gcc481/cms/dbs3-client/3.2.8a/etc/profile.d/init.sh; python2.6 "
            + command + "--correct_env")
        sys.exit(0)

    if len(args) != 1:
        print "Usage:"
        print "python closeOutTaskChainWorkflows.py [--test] [--verbose] <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)

    inputFile = args[0]
    f = open(inputFile, 'r')

    closedOut = []
    nonClosedOut = []
    tooManyEventsOrLumis = []
    running = []

    dbsApi = getRelValDsetNames.getDBSApi()

    for line in f:
        workflow = line.rstrip('\n')
        if options.verbose:
            print "checking workflow " + workflow
        schema = getRequestJson(workflow)
        if schema['RequestType'] != 'TaskChain':
            print "workflow type is not TaskChain, exiting"
            sys.exit(0)

        outputDatasets = getRelValDsetNames.getOutputDset(workflow)
        #        pprint.pprint(outputDatasets)
        # We should never hit this case
        if 'RequestNumEvents' in schema['Task1'] and 'InputDataset' in schema[
                'Task1']:
            print "Request cannot have both RequestNumEvents and InputDataset in Task1, exiting..."
            sys.exit(1)

        # Check whether it's FastSim or FullSim from scratch
        if 'RequestNumEvents' in schema['Task1']:
            inputEvents = schema['Task1']['RequestNumEvents']
            #            if options.verbose:
            #                print "DEBUG: RequestNumEvents: %d" % inputEvents

            closeOut = True
            tooMany = False
            for dataset in outputDatasets:
                outputEvents = 0

                outputEvents = getOutputEvents(dbsApi,
                                               dataset,
                                               verb=options.verbose)
                if options.verbose:
                    successRate = outputEvents / float(inputEvents)
                    print "  %-110s\t%d\t%.1f%%" % (dataset, outputEvents,
                                                    successRate * 100)

                if outputEvents == -10:
                    continue
                elif outputEvents == inputEvents:
                    pass
                elif outputEvents < inputEvents:
                    closeOut = False
                elif outputEvents > inputEvents:
                    closeOut = False
                    tooMany = True

            if closeOut:
                closedOut.append(workflow)
            else:
                nonClosedOut.append(workflow)
            if tooMany:
                tooManyEventsOrLumis.append(workflow)

        # Then it's either Data or MC recycling
        elif 'InputDataset' in schema['Task1']:
            inputDset = schema['Task1']['InputDataset']

            # It's Data
            if 'RunWhitelist' in schema['Task1']:
                closeOut = True
                tooMany = False

                runList = schema['Task1']['RunWhitelist']
                #                if options.verbose:
                #                    print "DEBUG: InputDset %s and runList is %r" % (inputDset, runList)

                inputEvents = getEventsDataSetRunList(dbsApi,
                                                      inputDset,
                                                      runList,
                                                      verb=options.verbose)
                #                if options.verbose:
                #                    print "DEBUG: InputDset %s and %d events" % (inputDset, inputEvents)

                for dataset in outputDatasets:
                    outputEvents = 0

                    outputEvents = getOutputEvents(dbsApi,
                                                   dataset,
                                                   verb=options.verbose)
                    if options.verbose:
                        successRate = outputEvents / float(inputEvents)
                        print "  %-110s\t%d\t%.1f%%" % (dataset, outputEvents,
                                                        successRate * 100)

                    if outputEvents == -10:
                        continue
                    elif outputEvents == inputEvents:
                        pass
                    elif outputEvents < inputEvents:
                        closeOut = False
                    elif outputEvents > inputEvents:
                        #print outputEvents
                        #print inputEvents
                        closeOut = False
                        tooMany = True

                if closeOut:
                    closedOut.append(workflow)
                else:
                    nonClosedOut.append(workflow)
                if tooMany:
                    tooManyEventsOrLumis.append(workflow)

            elif 'BlockWhitelist' in schema['Task1']:
                print "TODO: you need to code me to handle block white list"
                nonClosedOut.append(workflow)
                break

            # Most likely MC, since there is no run whitelist
            # it means we can just go for num of events
            else:
                closeOut = True
                tooMany = False

                inputEvents = getRelValDsetNames.getNumEvents(
                    dbsApi, inputDset)
                #                if options.verbose:
                #                    print "DEBUG: InputDset %s and %d events" % (inputDset, inputEvents)

                for dataset in outputDatasets:
                    outputEvents = 0

                    outputEvents = getOutputEvents(dbsApi,
                                                   dataset,
                                                   verb=options.verbose)
                    if options.verbose:
                        successRate = outputEvents / float(inputEvents)
                        print "  %-110s\t%d\t%.1f%%" % (dataset, outputEvents,
                                                        successRate * 100)

                    if outputEvents == -10:
                        continue
                    elif outputEvents == inputEvents:
                        pass
                    elif outputEvents < inputEvents:
                        closeOut = False
                    elif outputEvents > inputEvents:
                        closeOut = False
                        tooMany = True

                if closeOut:
                    closedOut.append(workflow)
                else:
                    nonClosedOut.append(workflow)
                if tooMany:
                    tooManyEventsOrLumis.append(workflow)

    print '-----------------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                             | Closed-out? | Current status          |'
    print '-----------------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in closedOut:
        status = getRequestStatus(workflow)
        if status == 'completed' and not options.test:
            setRequestStatus(workflow)
            status = 'closed-out'
        else:
            pass
        print "%100s\tYES\t\t%s" % (workflow, status)

    for workflow in nonClosedOut:
        status = getRequestStatus(workflow)
        print "%100s\tNO\t\t%s" % (workflow, status)

    print '-----------------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in tooManyEventsOrLumis:
        os.system(
            'echo ' + workflow +
            ' | mail -s \"newCloseOutTaskChain.py error 1\" [email protected]'
        )
        print "WARNING (more lumis and/or events --> " + workflow
    f.close
    sys.exit(0)
def main():
    parser = optparse.OptionParser()
    parser.add_option('--test',action="store_true", help='Nothing is closed out. Only test if the workflows are ready to be closed out.',dest='test')
    parser.add_option('--verbose',action="store_true", help='Print out details about the number of events expected and produced.',dest='verbose')
    parser.add_option('--correct_env',action="store_true",dest='correct_env')
    (options,args) = parser.parse_args()

    command=""
    for arg in sys.argv:
        command=command+arg+" "

    if not options.correct_env:
        os.system("source /cvmfs/grid.cern.ch/emi-ui-3.7.3-1_sl6v2/etc/profile.d/setup-emi3-ui-example.sh; export X509_USER_PROXY=/tmp/x509up_u13536; source /tmp/relval/sw/comp.pre/slc6_amd64_gcc481/cms/dbs3-client/3.2.8a/etc/profile.d/init.sh; python2.6 "+command + "--correct_env")
        sys.exit(0)
            

    if len(args) != 1:
        print "Usage:"
        print "python closeOutTaskChainWorkflows.py [--test] [--verbose] <inputFile_containing_a_list_of_workflows>"
        sys.exit(0)

    inputFile=args[0]
    f = open(inputFile, 'r')

    closedOut = []
    nonClosedOut = []
    tooManyEventsOrLumis = []
    running = []

    dbsApi = getRelValDsetNames.getDBSApi()

    for line in f:
        workflow = line.rstrip('\n')
        if options.verbose:
            print "checking workflow " + workflow
        schema = getRequestJson(workflow)
        if schema['RequestType'] != 'TaskChain':
            print "workflow type is not TaskChain, exiting"
            sys.exit(0)

        outputDatasets = getRelValDsetNames.getOutputDset(workflow)
#        pprint.pprint(outputDatasets)
        # We should never hit this case
        if 'RequestNumEvents' in schema['Task1'] and 'InputDataset' in schema['Task1']:
            print "Request cannot have both RequestNumEvents and InputDataset in Task1, exiting..."
            sys.exit(1)

        # Check whether it's FastSim or FullSim from scratch
        if 'RequestNumEvents' in schema['Task1']:
            inputEvents = schema['Task1']['RequestNumEvents']
#            if options.verbose:
#                print "DEBUG: RequestNumEvents: %d" % inputEvents

            closeOut = True
            tooMany = False
            for dataset in outputDatasets:
                outputEvents = 0

                outputEvents = getOutputEvents(dbsApi, dataset, verb = options.verbose)
                if options.verbose:
                    successRate = outputEvents/float(inputEvents)
                    print "  %-110s\t%d\t%.1f%%" % (dataset, outputEvents, successRate*100)

                if outputEvents == -10:
                    continue
                elif outputEvents == inputEvents:
                    pass
                elif outputEvents < inputEvents :
                    closeOut = False
                elif outputEvents > inputEvents :
                    closeOut = False
                    tooMany = True

            if closeOut:
                closedOut.append(workflow)
            else:
                nonClosedOut.append(workflow)
            if tooMany:
                tooManyEventsOrLumis.append(workflow)

        # Then it's either Data or MC recycling
        elif 'InputDataset' in schema['Task1']:
            inputDset = schema['Task1']['InputDataset']

            # It's Data
            if 'RunWhitelist' in schema['Task1']:
                closeOut = True
                tooMany = False

                runList = schema['Task1']['RunWhitelist']
#                if options.verbose:
#                    print "DEBUG: InputDset %s and runList is %r" % (inputDset, runList)

                inputEvents = getEventsDataSetRunList(dbsApi, inputDset, runList, verb = options.verbose)
#                if options.verbose:
#                    print "DEBUG: InputDset %s and %d events" % (inputDset, inputEvents)

                for dataset in outputDatasets:
                    outputEvents = 0

                    outputEvents = getOutputEvents(dbsApi, dataset, verb = options.verbose)
                    if options.verbose:
                        successRate = outputEvents/float(inputEvents)
                        print "  %-110s\t%d\t%.1f%%" % (dataset, outputEvents, successRate*100)

                    if outputEvents == -10:
                        continue
                    elif outputEvents == inputEvents:
                        pass
                    elif outputEvents < inputEvents :
                        closeOut = False
                    elif outputEvents > inputEvents :
                        #print outputEvents
                        #print inputEvents
                        closeOut = False
                        tooMany = True

                if closeOut:
                    closedOut.append(workflow)
                else:
                    nonClosedOut.append(workflow)
                if tooMany:
                    tooManyEventsOrLumis.append(workflow)

            elif 'BlockWhitelist' in schema['Task1']:
                print "TODO: you need to code me to handle block white list"
                nonClosedOut.append(workflow)
                break

            # Most likely MC, since there is no run whitelist
            # it means we can just go for num of events
            else:
                closeOut = True
                tooMany = False

                inputEvents = getRelValDsetNames.getNumEvents(dbsApi, inputDset)
#                if options.verbose:
#                    print "DEBUG: InputDset %s and %d events" % (inputDset, inputEvents)

                for dataset in outputDatasets:
                    outputEvents = 0

                    outputEvents = getOutputEvents(dbsApi, dataset, verb = options.verbose)
                    if options.verbose:
                        successRate = outputEvents/float(inputEvents)
                        print "  %-110s\t%d\t%.1f%%" % (dataset, outputEvents, successRate*100)
                
                    if outputEvents == -10:
                        continue
                    elif outputEvents == inputEvents:
                        pass
                    elif outputEvents < inputEvents :
                        closeOut = False
                    elif outputEvents > inputEvents :
                        closeOut = False
                        tooMany = True

                if closeOut:
                    closedOut.append(workflow)
                else:
                    nonClosedOut.append(workflow)
                if tooMany:
                    tooManyEventsOrLumis.append(workflow)

    print '-----------------------------------------------------------------------------------------------------------------------------------------------'
    print '| Request                                                                                             | Closed-out? | Current status          |'
    print '-----------------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in closedOut:
        status = getRequestStatus(workflow)
        if status == 'completed' and not options.test:
            setRequestStatus(workflow)
            status = 'closed-out'
        else:
            pass
        print "%100s\tYES\t\t%s" % (workflow, status)

    for workflow in nonClosedOut:
        status = getRequestStatus(workflow)
        print "%100s\tNO\t\t%s" % (workflow, status)

    print '-----------------------------------------------------------------------------------------------------------------------------------------------'

    for workflow in tooManyEventsOrLumis:
        os.system('echo '+workflow+' | mail -s \"newCloseOutTaskChain.py error 1\" [email protected]')
        print "WARNING (more lumis and/or events --> " + workflow
    f.close
    sys.exit(0)