def getTUsForDeletion(self, dconfig): """ Get list of candidate transfer units for deletion, in order, for a data_streams (argument is a DatasetConfig object) Looks in: arrivals directory (if there is one), data_stream directory and quarantine directory. First sort key is that it does arrivals dir before anything else, as this reduces checksumming. Apart from that, it does most recently created files first as these are the likely to be the easiest to find another copy. (NB uses change time, as this will more accurately reflect when it was really created on THIS system, whereas mtime can be set by rsync to match the modification time on another system) """ ds_dir = dconfig["data_stream"]["directory"] q_dir = dconfig["outgoing"]["quarantine_dir"] arr_dir = dconfig["incoming"]["directory"] list_dir_func = lambda dir_path: \ futils.listDir(dir_path, fullPaths = True, emptyListOnException = True) # add items in dataset dir transfer_units = list_dir_func(ds_dir) if q_dir: # if there is a quarantine directory, add items in the quarantine # directory, but first exclude the quarantine dir itself, which # may be an entry under the dataset dir transfer_units = filter(lambda path: path != q_dir, transfer_units) \ + list_dir_func(q_dir) transfer_units.sort(key = futils.getCtimeOrNone, reverse = True) # add items in arrivals dir at start (if there is one) if arr_dir: arr_transfer_units = list_dir_func(arr_dir) arr_transfer_units.sort(key = futils.getCtimeOrNone, reverse = True) transfer_units = arr_transfer_units + transfer_units # Okay we're done, though for good measure check they all really # exist transfer_units = filter(os.path.exists, transfer_units) return transfer_units
def listDir(self, *args, **kwargs): return futils.listDir(*args, **kwargs)