def prepareDeletionRequests(datasets):
    """
    make a single deletion request per bunch of datasets
    Filtering only the INVALID or DEPRECATED ones
    """
    
    size = 30

    #delete duplicates
    datasets = list(set(datasets))
    
    #group datasets by sites
    requests = {}
    for ds in datasets:
        try:
            t = dbs.getDatasetStatus(ds)
            #filter by status
            if t != 'INVALID' and t != 'DEPRECATED':
                continue
            sites = phd.getBlockReplicaSites(ds, onlycomplete=False)
            for s in sites:
                #ignore buffers
                if "Buffer" in s or "Export" in s:
                    continue
                if s not in requests:
                    requests[s] = []
                requests[s].append(ds)
        except Exception as e:
            print "error:",ds,e
    

    return requests
def printDsLocation(ds, clean=False, anyb=False):
    """
    Only printing
    """
    onlycomplete = not anyb
    sites = sorted(phedexClient.getBlockReplicaSites(ds, onlycomplete))
    print ds
    if onlycomplete:
        print "block replicas (only complete):"
    else:
        print "All block replicas"
    print ",".join(sites)

    # print subscriptions only when asked for full block
    if onlycomplete:
        sites = sorted(phedexClient.getSubscriptionSites(ds))
        print "subscriptions:"
        print ",".join(sites)

    # print in the clean ready-to-use format
    if clean:
        sites2 = []
        for s in sites:
            if "_MSS" in s or "_Export" in s or "_Buffer" in s:
                continue
            s = s.replace("_Disk", "")
            sites2.append(s)
        print ",".join(sites2)

    # and the size
    size = dbsClient.getDatasetSize(ds)
    print formatSize(size)
Example #3
0
def prepareDeletionRequests(datasets):
    """
    make a single deletion request per bunch of datasets
    Filtering only the INVALID or DEPRECATED ones
    """

    size = 30

    #delete duplicates
    datasets = list(set(datasets))

    #group datasets by sites
    requests = {}
    for ds in datasets:
        try:
            t = dbs.getDatasetStatus(ds)
            #filter by status
            if t != 'INVALID' and t != 'DEPRECATED':
                continue
            sites = phd.getBlockReplicaSites(ds, onlycomplete=False)
            for s in sites:
                #ignore buffers
                if "Buffer" in s or "Export" in s:
                    continue
                if s not in requests:
                    requests[s] = []
                requests[s].append(ds)
        except Exception as e:
            print "error:", ds, e

    return requests
def printDsLocation(ds, clean=False, anyb=False):
    """
    Only printing
    """
    onlycomplete = not anyb
    sites = sorted(phedexClient.getBlockReplicaSites(ds, onlycomplete))
    print ds
    if onlycomplete:
        print "block replicas (only complete):"
    else:
        print "All block replicas"
    print ','.join(sites)

    # print subscriptions only when asked for full block
    if onlycomplete:
        sites = sorted(phedexClient.getSubscriptionSites(ds))
        print "subscriptions:"
        print ','.join(sites)

    # print in the clean ready-to-use format
    if clean:
        sites2 = []
        for s in sites:
            if '_MSS' in s or '_Export' in s or '_Buffer' in s:
                continue
            s = s.replace('_Disk', '')
            sites2.append(s)
        print ','.join(sites2)

    # and the size
    size = dbsClient.getDatasetSize(ds)
    print formatSize(size)
Example #5
0
def getSiteWithMostInput(dataset, threshold):
        sites = phedexClient.getBlockReplicaSites(dataset)
        for site in sites:
           if 'MSS' not in site and 'Export' not in site and 'Buffer' not in site and 'EC2' not in site and 'CERN' not in site and (('T1' in site and 'AODSIM' not in dataset) or 'AODSIM' in dataset):
              completion = getSizeAtSite(site, dataset)
              if (completion == 100.0 or completion > threshold):
                 site = site.replace('_Disk', '')
                 return [site, completion]
        return ['None', 0]
def makeDeletionRequests(url, allDatasets, verbose=False):
    """
    make a single deletion request per bunch of datasets
    Filtering only the INVALID or DEPRECATED ones
    """
    
    size = 20
    deletionRequests = []
    #delete duplicates
    allDatasets = list(set(allDatasets))
    while allDatasets:
        datasets = allDatasets[:size]
        allDatasets = allDatasets[size:]
        
        #get the sites
        sites = set()
        #add all sites for all datasets
        dsToDelete = set()
        for ds in datasets:
            try:
                t = dbs.getDatasetStatus(ds)
                if verbose:
                    print ds, 'is', t
                #filter by status
                if t == 'INVALID' or t == 'DEPRECATED':
                    dsToDelete.add(ds)
                sites2 = phd.getBlockReplicaSites(ds, onlycomplete=False)
                for s in sites2:
                    #ignore buffers
                    if "Buffer" in s or "Export" in s:
                        continue
                    sites.add(s)
                if verbose:
                    print "available in", sites
            except Exception as e:
                print ds,e
                
        #create a single request
        if dsToDelete and sites:
            print "About to create a deletion request for"
            print '\n'.join(dsToDelete)
            print "To this sites:"
            print '\n'.join(sites)
            r = phd.makeDeletionRequest(url, list(sites), dsToDelete, "Invalid data, can be deleted")
            if ("phedex" in r and 
                    "request_created" in r["phedex"] and
                    "id" in r["phedex"]["request_created"]):
                reqid = r["phedex"]["request_created"]["id"]
                deletionRequests.append(reqid)
                if verbose:
                    print "Request created:", reqid
            else:
                print r
    return deletionRequests
def makeDeletionRequests(url, allDatasets, verbose=False, test=False):
    """
    make a single deletion request per bunch of datasets
    Filtering only the INVALID or DEPRECATED ones
    """
    
    size = 30

    #delete duplicates
    datasets = list(set(allDatasets))
    
    #group datasets by sites
    requests = {}
    for ds in datasets:
        try:
            t = dbs.getDatasetStatus(ds)
            if verbose:
                print ds, 'is', t
            #filter by status
            if t != 'INVALID' and t != 'DEPRECATED':
                continue
            sites = phd.getBlockReplicaSites(ds, onlycomplete=False)
            for s in sites:
                #ignore buffers
                if "Buffer" in s or "Export" in s:
                    continue
                if s not in requests:
                    requests[s] = []
                requests[s].append(ds)
            if verbose:
                print "available in", sites
        except Exception as e:
            print ds,e
    

    deletionRequests = []
    #for each site
    for s in sorted(requests.keys()):
        datasets = requests[s]
        print "site", s
        print "datasets to delete"
        print '\n'.join(datasets)
        if not test:
            r = phd.makeDeletionRequest(url, [s], datasets, "Invalid data, can be deleted")
            if ("phedex" in r and 
                    "request_created" in r["phedex"]):
                reqid = r["phedex"]["request_created"][0]["id"]
                deletionRequests.append(reqid)
                if verbose:
                    print "Request created:", reqid
            else:
                print r
    return deletionRequests
def makeDeletionRequests(url, allDatasets, verbose=False, test=False):
    """
    make a single deletion request per bunch of datasets
    Filtering only the INVALID or DEPRECATED ones
    """

    size = 30

    #delete duplicates
    datasets = list(set(allDatasets))

    #group datasets by sites
    requests = {}
    for ds in datasets:
        try:
            t = dbs.getDatasetStatus(ds)
            if verbose:
                print ds, 'is', t
            #filter by status
            if t != 'INVALID' and t != 'DEPRECATED':
                continue
            sites = phd.getBlockReplicaSites(ds, onlycomplete=False)
            for s in sites:
                #ignore buffers
                if "Buffer" in s or "Export" in s:
                    continue
                if s not in requests:
                    requests[s] = []
                requests[s].append(ds)
            if verbose:
                print "available in", sites
        except Exception as e:
            print ds, e

    deletionRequests = []
    #for each site
    for s in sorted(requests.keys()):
        datasets = requests[s]
        print "site", s
        print "datasets to delete"
        print '\n'.join(datasets)
        if not test:
            r = phd.makeDeletionRequest(url, [s], datasets,
                                        "Invalid data, can be deleted")
            if ("phedex" in r and "request_created" in r["phedex"]):
                reqid = r["phedex"]["request_created"][0]["id"]
                deletionRequests.append(reqid)
                if verbose:
                    print "Request created:", reqid
            else:
                print r
    return deletionRequests