def main():
    #dataset = sys.argv[1]
    lines = [l.strip() for l in open(sys.argv[1])]
    #look for datasetname
    for i in range(3):
        if lines[i].startswith('dataset'):
            dataset = lines[i].replace('dataset : ','').strip()
            break
    print "'%s'"%dataset
    #build graph and calculate
    graph = buildGraph(lines)
    try:
        files = colorBipartiteGraph(graph)
    except Exception as e:
        files = deleteMaxDegreeFirst(graph)
    total = dbs.getEventCountDataSet(dataset)
    invalid = dbs.getEventCountDataSetFileList(dataset, files)

    print 'total events %s'%total
    print 'invalidated files %s'%len(files)
    print 'invalidated events %s'%invalid
    if total:
        print '%s%%'%(float(total-invalid)/total*100.0)
    for f in sorted(files):
        print f
예제 #2
0
def main():
    #dataset = sys.argv[1]
    lines = [l.strip() for l in open(sys.argv[1])]
    #look for datasetname
    for i in range(3):
        if lines[i].startswith('dataset'):
            dataset = lines[i].replace('dataset : ','').strip()
            break
    print "'%s'"%dataset
    print "Building graph model"
    graph = buildGraph(lines)

    print "Getting events per file"
    events = getFileEvents(dataset, graph.keys())
    try:
        #first algorithm that assumes bipartition        
        files = colorBipartiteGraph(graph, events)
    except Exception as e:
        #second, algorithm
        #files = deleteMaxDegreeFirst(graph, events)
        files = deleteSmallestVertexFirst(graph, events)
    
    total = dbs.getEventCountDataSet(dataset)
    invalid = dbs.getEventCountDataSetFileList(dataset, files)

    print 'total events %s'%total
    print 'invalidated files %s'%len(files)
    print 'invalidated events %s'%invalid
    if total:
        print '%s%%'%(float(total-invalid)/total*100.0)
    for f in sorted(files):
        print f
예제 #3
0
def getFileEvents(dataset, files):
    """
    Builds a dict files-> num events
    """
    eventCount = {}
    for f in files:
        evs = dbs.getEventCountDataSetFileList(dataset, [f])
        #evs = random.randrange(10000)
        eventCount[f] = evs
    return eventCount
def main():
    dataset = sys.argv[1]
    lines = [l.strip() for l in open(sys.argv[2])]
    graph = buildGraph(lines)
    files = deleteMaxDegreeFirst(graph)
    total = dbs.getEventCountDataSet(dataset)
    invalid = dbs.getEventCountDataSetFileList(dataset, files)
    print 'total events %s'%total
    print 'invalidated files %s'%len(files)
    print 'invalidated events %s'%invalid
    print '%s%%'%(float(total-invalid)/total*100.0)
    for f in files:
        print f
예제 #5
0
def main():
    
    usage = "python %prog [OPTIONS]"
    parser = OptionParser(usage)
    parser.add_option("-a", "--doall",dest="doall", action="store_true" , default=False, 
                      help="It will analyze all datasets of the workflow from the beginning. If this option is true,"\
                        " you should provide a workflow name or a list of them in the --file option.")
    parser.add_option("-f", "--file",dest="file", 
                      help="Input file with the contents of duplicateEvents.py (a list of lumis and files)."\
                      " If you are using the --doall option, it should contain a list of workflows instead")
    
    options, args = parser.parse_args()
    workflows = None
    #if we not doing all, input should be treated as list of lumis an files
    if not options.doall and options.file:
        lines = [l.strip() for l in open(options.file)]
        graphs = buildGraphs(lines)
    # if do all and input file
    elif options.doall and options.file:
        workflows = [l.strip() for l in open(options.file)]
    elif options.doall and not options.file:
        workflows = args
    else:
        parser.error("You should provide an input file with the output of duplicateEvents")

    # get the output datasets of the workflos and create the graph
    if workflows:
        datasets = []
        for wf in workflows:
            datasets += reqMgrClient.outputdatasetsWorkflow(url, wf);
        
        graphs = {}
        #analyze each dataset
        for dataset in datasets:
            dup, lumis = dbs.duplicateRunLumi(dataset, verbose="dict", skipInvalid=True)
            #print lumis
            graphs[dataset] = buildGraph(lumis)
            
    
    for dataset, graph in graphs.items():
        #look for datasetname
        print "Getting events per file"
        events = getFileEvents(dataset, graph.keys())
        try:
            #first algorithm that assumes bipartition        
            files = colorBipartiteGraph(graph, events)
        except Exception as e:
            #second, algorithm
            #files = deleteMaxDegreeFirst(graph, events)
            files = deleteSmallestVertexFirst(graph, events)
        
        total = dbs.getEventCountDataSet(dataset)
        invalid = dbs.getEventCountDataSetFileList(dataset, files)
    
        print 'total events %s'%total
        print 'invalidated files %s'%len(files)
        print 'invalidated events %s'%invalid
        if total:
            print '%s%%'%(float(total-invalid)/total*100.0)
        for f in sorted(files):
            print f