def prepareDeletionRequests(datasets): """ make a single deletion request per bunch of datasets Filtering only the INVALID or DEPRECATED ones """ size = 30 #delete duplicates datasets = list(set(datasets)) #group datasets by sites requests = {} for ds in datasets: try: t = dbs.getDatasetStatus(ds) #filter by status if t != 'INVALID' and t != 'DEPRECATED': continue sites = phd.getBlockReplicaSites(ds, onlycomplete=False) for s in sites: #ignore buffers if "Buffer" in s or "Export" in s: continue if s not in requests: requests[s] = [] requests[s].append(ds) except Exception as e: print "error:",ds,e return requests
def printDsLocation(ds, clean=False, anyb=False): """ Only printing """ onlycomplete = not anyb sites = sorted(phedexClient.getBlockReplicaSites(ds, onlycomplete)) print ds if onlycomplete: print "block replicas (only complete):" else: print "All block replicas" print ",".join(sites) # print subscriptions only when asked for full block if onlycomplete: sites = sorted(phedexClient.getSubscriptionSites(ds)) print "subscriptions:" print ",".join(sites) # print in the clean ready-to-use format if clean: sites2 = [] for s in sites: if "_MSS" in s or "_Export" in s or "_Buffer" in s: continue s = s.replace("_Disk", "") sites2.append(s) print ",".join(sites2) # and the size size = dbsClient.getDatasetSize(ds) print formatSize(size)
def prepareDeletionRequests(datasets): """ make a single deletion request per bunch of datasets Filtering only the INVALID or DEPRECATED ones """ size = 30 #delete duplicates datasets = list(set(datasets)) #group datasets by sites requests = {} for ds in datasets: try: t = dbs.getDatasetStatus(ds) #filter by status if t != 'INVALID' and t != 'DEPRECATED': continue sites = phd.getBlockReplicaSites(ds, onlycomplete=False) for s in sites: #ignore buffers if "Buffer" in s or "Export" in s: continue if s not in requests: requests[s] = [] requests[s].append(ds) except Exception as e: print "error:", ds, e return requests
def printDsLocation(ds, clean=False, anyb=False): """ Only printing """ onlycomplete = not anyb sites = sorted(phedexClient.getBlockReplicaSites(ds, onlycomplete)) print ds if onlycomplete: print "block replicas (only complete):" else: print "All block replicas" print ','.join(sites) # print subscriptions only when asked for full block if onlycomplete: sites = sorted(phedexClient.getSubscriptionSites(ds)) print "subscriptions:" print ','.join(sites) # print in the clean ready-to-use format if clean: sites2 = [] for s in sites: if '_MSS' in s or '_Export' in s or '_Buffer' in s: continue s = s.replace('_Disk', '') sites2.append(s) print ','.join(sites2) # and the size size = dbsClient.getDatasetSize(ds) print formatSize(size)
def getSiteWithMostInput(dataset, threshold): sites = phedexClient.getBlockReplicaSites(dataset) for site in sites: if 'MSS' not in site and 'Export' not in site and 'Buffer' not in site and 'EC2' not in site and 'CERN' not in site and (('T1' in site and 'AODSIM' not in dataset) or 'AODSIM' in dataset): completion = getSizeAtSite(site, dataset) if (completion == 100.0 or completion > threshold): site = site.replace('_Disk', '') return [site, completion] return ['None', 0]
def makeDeletionRequests(url, allDatasets, verbose=False): """ make a single deletion request per bunch of datasets Filtering only the INVALID or DEPRECATED ones """ size = 20 deletionRequests = [] #delete duplicates allDatasets = list(set(allDatasets)) while allDatasets: datasets = allDatasets[:size] allDatasets = allDatasets[size:] #get the sites sites = set() #add all sites for all datasets dsToDelete = set() for ds in datasets: try: t = dbs.getDatasetStatus(ds) if verbose: print ds, 'is', t #filter by status if t == 'INVALID' or t == 'DEPRECATED': dsToDelete.add(ds) sites2 = phd.getBlockReplicaSites(ds, onlycomplete=False) for s in sites2: #ignore buffers if "Buffer" in s or "Export" in s: continue sites.add(s) if verbose: print "available in", sites except Exception as e: print ds,e #create a single request if dsToDelete and sites: print "About to create a deletion request for" print '\n'.join(dsToDelete) print "To this sites:" print '\n'.join(sites) r = phd.makeDeletionRequest(url, list(sites), dsToDelete, "Invalid data, can be deleted") if ("phedex" in r and "request_created" in r["phedex"] and "id" in r["phedex"]["request_created"]): reqid = r["phedex"]["request_created"]["id"] deletionRequests.append(reqid) if verbose: print "Request created:", reqid else: print r return deletionRequests
def makeDeletionRequests(url, allDatasets, verbose=False, test=False): """ make a single deletion request per bunch of datasets Filtering only the INVALID or DEPRECATED ones """ size = 30 #delete duplicates datasets = list(set(allDatasets)) #group datasets by sites requests = {} for ds in datasets: try: t = dbs.getDatasetStatus(ds) if verbose: print ds, 'is', t #filter by status if t != 'INVALID' and t != 'DEPRECATED': continue sites = phd.getBlockReplicaSites(ds, onlycomplete=False) for s in sites: #ignore buffers if "Buffer" in s or "Export" in s: continue if s not in requests: requests[s] = [] requests[s].append(ds) if verbose: print "available in", sites except Exception as e: print ds,e deletionRequests = [] #for each site for s in sorted(requests.keys()): datasets = requests[s] print "site", s print "datasets to delete" print '\n'.join(datasets) if not test: r = phd.makeDeletionRequest(url, [s], datasets, "Invalid data, can be deleted") if ("phedex" in r and "request_created" in r["phedex"]): reqid = r["phedex"]["request_created"][0]["id"] deletionRequests.append(reqid) if verbose: print "Request created:", reqid else: print r return deletionRequests
def makeDeletionRequests(url, allDatasets, verbose=False, test=False): """ make a single deletion request per bunch of datasets Filtering only the INVALID or DEPRECATED ones """ size = 30 #delete duplicates datasets = list(set(allDatasets)) #group datasets by sites requests = {} for ds in datasets: try: t = dbs.getDatasetStatus(ds) if verbose: print ds, 'is', t #filter by status if t != 'INVALID' and t != 'DEPRECATED': continue sites = phd.getBlockReplicaSites(ds, onlycomplete=False) for s in sites: #ignore buffers if "Buffer" in s or "Export" in s: continue if s not in requests: requests[s] = [] requests[s].append(ds) if verbose: print "available in", sites except Exception as e: print ds, e deletionRequests = [] #for each site for s in sorted(requests.keys()): datasets = requests[s] print "site", s print "datasets to delete" print '\n'.join(datasets) if not test: r = phd.makeDeletionRequest(url, [s], datasets, "Invalid data, can be deleted") if ("phedex" in r and "request_created" in r["phedex"]): reqid = r["phedex"]["request_created"][0]["id"] deletionRequests.append(reqid) if verbose: print "Request created:", reqid else: print r return deletionRequests