def testCommit(self): """ Testcase for the commit method of the Fileset class """ localTestFileSet = Fileset('LocalTestFileset', self.initialSet) fsSize = len(localTestFileSet.getFiles(type = "lfn")) #Dummy file to test fileTestCommit = File('/tmp/filetestcommit',0000,1,1) #File is added to the newfiles attribute of localTestFileSet localTestFileSet.addFile(fileTestCommit) assert fsSize == len(localTestFileSet.getFiles(type = "lfn")) - 1, 'file not added'\ 'correctly to test fileset' newfilestemp = localTestFileSet.newfiles assert fileTestCommit in newfilestemp, 'test file not in the new files'\ 'list' #After commit, dummy file is supposed to move from newfiles to files localTestFileSet.commit() #First, testing if the new file is present at file set object attribute of the Fileset object assert newfilestemp.issubset(localTestFileSet.files), 'Test file not ' \ 'present at fileset.files - fileset.commit ' \ 'not working properly' #Second, testing if the newfile set object attribute is empty assert localTestFileSet.newfiles == set(), \ 'Test file not present at fileset.newfiles ' \ '- fileset.commit not working properly'
def testCommit(self): """ Testcase for the commit method of the Fileset class """ localTestFileSet = Fileset('LocalTestFileset', self.initialSet) fsSize = len(localTestFileSet.getFiles(type="lfn")) #Dummy file to test fileTestCommit = File('/tmp/filetestcommit', 0000, 1, 1) #File is added to the newfiles attribute of localTestFileSet localTestFileSet.addFile(fileTestCommit) assert fsSize == len(localTestFileSet.getFiles(type = "lfn")) - 1, 'file not added'\ 'correctly to test fileset' newfilestemp = localTestFileSet.newfiles assert fileTestCommit in newfilestemp, 'test file not in the new files'\ 'list' #After commit, dummy file is supposed to move from newfiles to files localTestFileSet.commit() #First, testing if the new file is present at file set object attribute of the Fileset object assert newfilestemp.issubset(localTestFileSet.files), 'Test file not ' \ 'present at fileset.files - fileset.commit ' \ 'not working properly' #Second, testing if the newfile set object attribute is empty assert localTestFileSet.newfiles == set(), \ 'Test file not present at fileset.newfiles ' \ '- fileset.commit not working properly'
def testMetaData(self): """ _testMetaData_ Make sure that the workflow name, task, owner and white and black lists make it into each job object. """ testWorkflow = Workflow(spec = "spec.pkl", owner = "Steve", name = "TestWorkflow", task = "TestTask") testFileset = Fileset(name = "TestFileset") testFile = File(lfn = "someLFN") testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "FileBased") myJobFactory = JobFactory(subscription = testSubscription) testJobGroups = myJobFactory(siteWhitelist = ["site1"], siteBlacklist = ["site2"]) self.assertTrue(len(testJobGroups) > 0) for testJobGroup in testJobGroups: self.assertTrue(len(testJobGroup.jobs) > 0) for job in testJobGroup.jobs: self.assertEqual(job["task"], "TestTask", "Error: Task is wrong.") self.assertEqual(job["workflow"], "TestWorkflow", "Error: Workflow is wrong.") self.assertEqual(job["owner"], "Steve", "Error: Owner is wrong.") return
def testCall(self): fileset = Fileset(name="FakeFeederTest") for i in range(1, 21): self.feeder([fileset]) set = fileset.getFiles(type = "set") if len(set) > 0: file = set.pop() fileset.commit()
def testCall(self): fileset = Fileset(name="FakeFeederTest") for i in range(1, 21): self.feeder([fileset]) set = fileset.getFiles(type="set") if len(set) > 0: file = set.pop() fileset.commit()
def testProductionRunNumber(self): """ _testProductionRunNumber_ Verify that jobs created by production subscritpions have the correct run number is their job mask. Also verify that non-production subscriptions don't have modified run numbers. """ testWorkflow = Workflow(spec="spec.pkl", owner="Steve", name="TestWorkflow", task="TestTask") testFileset = Fileset(name="TestFileset") testFile = File(lfn="someLFN") testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased", type="Production") myJobFactory = JobFactory(subscription=testSubscription) testJobGroups = myJobFactory() self.assertTrue(len(testJobGroups) > 0) for testJobGroup in testJobGroups: self.assertTrue(len(testJobGroup.jobs) > 0) for job in testJobGroup.jobs: self.assertEqual(job["mask"]["FirstRun"], 1, "Error: First run is wrong.") self.assertEqual(job["mask"]["LastRun"], 1, "Error: Last run is wrong.") testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased", type="Processing") myJobFactory = JobFactory(subscription=testSubscription) testJobGroups = myJobFactory() for testJobGroup in testJobGroups: for job in testJobGroup.jobs: self.assertEqual(job["mask"]["FirstRun"], None, "Error: First run is wrong.") self.assertEqual(job["mask"]["LastRun"], None, "Error: Last run is wrong.") return
def testProductionRunNumber(self): """ _testProductionRunNumber_ Verify that jobs created by production subscritpions have the correct run number is their job mask. Also verify that non-production subscriptions don't have modified run numbers. """ testWorkflow = Workflow(spec = "spec.pkl", owner = "Steve", name = "TestWorkflow", task = "TestTask") testFileset = Fileset(name = "TestFileset") testFile = File(lfn = "someLFN") testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Production") myJobFactory = JobFactory(subscription = testSubscription) testJobGroups = myJobFactory() for testJobGroup in testJobGroups: for job in testJobGroup.jobs: assert job["mask"]["FirstRun"] == 1, \ "Error: First run is wrong." assert job["mask"]["LastRun"] == 1, \ "Error: Last run is wrong." testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "FileBased", type = "Processing") myJobFactory = JobFactory(subscription = testSubscription) testJobGroups = myJobFactory() for testJobGroup in testJobGroups: for job in testJobGroup.jobs: assert job["mask"]["FirstRun"] == None, \ "Error: First run is wrong." assert job["mask"]["LastRun"] == None, \ "Error: Last run is wrong." return
def testMetaData(self): """ _testMetaData_ Make sure that the workflow name, task, owner and white and black lists make it into each job object. """ testWorkflow = Workflow(spec="spec.pkl", owner="Steve", name="TestWorkflow", task="TestTask") testFileset = Fileset(name="TestFileset") testFile = File(lfn="someLFN") testFileset.addFile(testFile) testFileset.commit() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") myJobFactory = JobFactory(subscription=testSubscription) testJobGroups = myJobFactory(siteWhitelist=["site1"], siteBlacklist=["site2"]) self.assertTrue(len(testJobGroups) > 0) for testJobGroup in testJobGroups: self.assertTrue(len(testJobGroup.jobs) > 0) for job in testJobGroup.jobs: self.assertEqual(job["task"], "TestTask", "Error: Task is wrong.") self.assertEqual(job["workflow"], "TestWorkflow", "Error: Workflow is wrong.") self.assertEqual(job["owner"], "Steve", "Error: Owner is wrong.") self.assertEqual(job["siteWhitelist"], ["site1"], "Error: Site white list is wrong.") self.assertEqual(job["siteBlacklist"], ["site2"], "Error: Site black list is wrong.") return
class Subscription(Pickleable, dict): def __init__(self, fileset=None, workflow=None, whitelist=None, blacklist=None, split_algo="FileBased", type="Processing"): if fileset == None: fileset = Fileset() if whitelist == None: whitelist = set() if blacklist == None: blacklist = set() self.setdefault('fileset', fileset) self.setdefault('workflow', workflow) self.setdefault('type', type) self.setdefault('split_algo', split_algo) self.setdefault('whitelist', whitelist) self.setdefault('blacklist', blacklist) self.available = Fileset(name=fileset.name, files=fileset.getFiles()) self.acquired = Fileset(name='acquired') self.completed = Fileset(name='completed') self.failed = Fileset(name='failed') def name(self): return self.getWorkflow().name.replace(' ', '') + '_' + \ self.getFileset().name.replace(' ', '') def getWorkflow(self): return self["workflow"] def workflowName(self): if self["workflow"] == None: return "Unknown" return self["workflow"].name def taskName(self): if self['workflow'] == None: return "Unknown" return self['workflow'].task def getFileset(self): return self['fileset'] def acquireFiles(self, files=[], size=1): """ Return the files acquired """ self.acquired.commit() self.available.commit() self.failed.commit() self.completed.commit() retval = [] if len(files): for i in files: # Check each set, instead of elif, just in case something has # got out of synch if i in self.available.files: self.available.files.remove(i) if i in self.failed.files: self.failed.files.remove(i) if i in self.completed.files: self.completed.files.remove(i) self.acquired.addFile(i) else: if len(self.available.files) < size or size == 0: size = len(self.available.files) for i in range(size): self.acquired.addFile(self.available.files.pop()) return self.acquired.listNewFiles() def completeFiles(self, files): """ Return the number of files complete """ self.acquired.commit() self.available.commit() self.failed.commit() self.completed.commit() for i in files: # Check each set, instead of elif, just in case something has # got out of synch if i in self.available.files: self.available.files.remove(i) if i in self.failed.files: self.failed.files.remove(i) if i in self.acquired.files: self.acquired.files.remove(i) self.completed.addFile(i) def failFiles(self, files): """ Return the number of files failed """ self.acquired.commit() self.available.commit() self.failed.commit() self.completed.commit() for i in files: # Check each set, instead of elif, just in case something has # got out of synch if i in self.available.files: self.available.files.remove(i) if i in self.completed.files: self.completed.files.remove(i) if i in self.acquired.files: self.acquired.files.remove(i) self.failed.addFile(i) def filesOfStatus(self, status=None): """ _filesOfStatus_ Return a Set of File objects that are associated with the subscription and have a particular status. """ status = status.title() if status == 'Available': return self.available.getFiles(type='set') - \ (self.acquiredFiles() | self.completedFiles() | self.failedFiles()) elif status == 'Acquired': return self.acquired.getFiles(type='set') elif status == 'Completed': return self.completed.getFiles(type='set') elif status == 'Failed': return self.failed.getFiles(type='set') def markLocation(self, location, whitelist=True): """ Add a location to the subscriptions white or black list """ if whitelist: self['whitelist'].add(location) else: self['blacklist'].add(location) def availableFiles(self): """ Return a Set of files that are available for processing (e.g. not already in use) and at sites that are white listed or not black listed """ def locationMagic(files, locations): """ files and locations are sets. method returns the subset of files that are at the locations - this is a lot simpler with the database """ magicfiles = set() for f in files: if len(f['locations'] & locations) > 0: magicfiles.add(f) return magicfiles files = self.filesOfStatus(status="Available") if len(self['whitelist']) > 0: # Return files at white listed sites return locationMagic(files, self['whitelist']) elif len(self['blacklist']) > 0: # Return files not at black listed sites return files - locationMagic(files, self['blacklist']) #Return all files, because you're crazy and just don't care return files def acquiredFiles(self): """ Set of files marked as acquired. """ return self.filesOfStatus(status="Acquired") def completedFiles(self): """ Set of files marked as completed. """ return self.filesOfStatus(status="Completed") def failedFiles(self): """ Set of files marked as failed. """ return self.filesOfStatus(status="Failed")
class Subscription(Pickleable, dict): def __init__(self, fileset = None, workflow = None, split_algo = "FileBased", type = "Processing"): if fileset == None: fileset = Fileset() self.setdefault('fileset', fileset) self.setdefault('workflow', workflow) self.setdefault('type', type) self.setdefault('split_algo', split_algo) self.available = Fileset(name=fileset.name, files = fileset.getFiles()) self.acquired = Fileset(name='acquired') self.completed = Fileset(name='completed') self.failed = Fileset(name='failed') def name(self): return self.getWorkflow().name.replace(' ', '') + '_' + \ self.getFileset().name.replace(' ', '') def getWorkflow(self): return self["workflow"] def workflowName(self): if self["workflow"] == None: return "Unknown" return self["workflow"].name def workflowType(self): if self["workflow"] == None: return "Unknown" return self["workflow"].wfType def taskName(self): if self['workflow'] == None: return "Unknown" return self['workflow'].task def owner(self): if self['workflow'] == None: return 'Unknown' return self['workflow'].owner def getFileset(self): return self['fileset'] def acquireFiles(self, files = [], size=1): """ Return the files acquired """ self.acquired.commit() self.available.commit() self.failed.commit() self.completed.commit() retval = [] if len(files): for i in files: # Check each set, instead of elif, just in case something has # got out of synch if i in self.available.files: self.available.files.remove(i) if i in self.failed.files: self.failed.files.remove(i) if i in self.completed.files: self.completed.files.remove(i) self.acquired.addFile(i) else: if len(self.available.files) < size or size == 0: size = len(self.available.files) for i in range(size): self.acquired.addFile(self.available.files.pop()) return self.acquired.listNewFiles() def completeFiles(self, files): """ Return the number of files complete """ self.acquired.commit() self.available.commit() self.failed.commit() self.completed.commit() for i in files: # Check each set, instead of elif, just in case something has # got out of synch if i in self.available.files: self.available.files.remove(i) if i in self.failed.files: self.failed.files.remove(i) if i in self.acquired.files: self.acquired.files.remove(i) self.completed.addFile(i) def failFiles(self, files): """ Return the number of files failed """ self.acquired.commit() self.available.commit() self.failed.commit() self.completed.commit() for i in files: # Check each set, instead of elif, just in case something has # got out of synch if i in self.available.files: self.available.files.remove(i) if i in self.completed.files: self.completed.files.remove(i) if i in self.acquired.files: self.acquired.files.remove(i) self.failed.addFile(i) def filesOfStatus(self, status=None, doingJobSplitting = False): """ _filesOfStatus_ Return a Set of File objects that are associated with the subscription and have a particular status. """ status = status.title() if status == 'Available': return self.available.getFiles(type='set') - \ (self.acquiredFiles() | self.completedFiles() | self.failedFiles()) elif status == 'Acquired': return self.acquired.getFiles(type='set') elif status == 'Completed': return self.completed.getFiles(type='set') elif status == 'Failed': return self.failed.getFiles(type='set') def availableFiles(self, limit = None, doingJobSplitting = False): """ _availableFiles_ Return a Set of files that are available for processing (e.g. not already in use) """ if limit: return list(self.filesOfStatus(status = "Available", doingJobSplitting = doingJobSplitting))[:limit] else: return self.filesOfStatus(status = "Available", doingJobSplitting = doingJobSplitting) def acquiredFiles(self): """ Set of files marked as acquired. """ return self.filesOfStatus(status = "Acquired") def completedFiles(self): """ Set of files marked as completed. """ return self.filesOfStatus(status = "Completed") def failedFiles(self): """ Set of files marked as failed. """ return self.filesOfStatus(status = "Failed")
class JobGroup(WMObject): """ JobGroups are sets of jobs running on files who's output needs to be merged together. """ def __init__(self, subscription = None, jobs = None): self.jobs = [] self.newjobs = [] self.id = 0 if type(jobs) == list: self.newjobs = jobs elif jobs != None: self.newjobs = [jobs] self.subscription = subscription self.output = Fileset() self.last_update = datetime.datetime.now() def add(self, job): """ _add_ Add a Job or list of jobs to the JobGroup. """ jobList = self.makelist(job) self.newjobs.extend(jobList) return def commit(self): """ _commit_ Move any jobs in the newjobs dict to the job dict. Empty the newjobs dict. """ self.jobs.extend(self.newjobs) self.newjobs = [] def commitBulk(self): """ Dummy method for consistency with WMBS implementation """ self.commit() def addOutput(self, file): """ _addOutput_ Add a File to the JobGroup's output fileset. The File is committed to the Fileset immediately. """ self.output.addFile(file) self.output.commit() def getJobs(self, type = "list"): """ _getJobs_ Retrieve all of the jobs in the JobGroup. The output will either be returned as a list of Job objects (when type is "list") or a list of Job IDs (when type is "id"). """ if type == "list": return self.jobs elif type == "id": jobIDs = [] for job in self.jobs: jobIDs.append(job["id"]) return jobIDs else: print "Unknown type: %s" % type return def getOutput(self, type = "list"): """ _getOutput_ Retrieve all of the files that are in the JobGroup's output fileset. Type can be one of the following: list, set, lfn, id. """ return self.output.getFiles(type = type) def getLength(self, obj): """ This just gets a length for either dict or list objects """ if type(obj) == dict: return len(obj.keys()) elif type(obj) == list: return len(obj) else: return 0 def __len__(self): """ Allows use of len() on JobGroup """ return self.getLength(self.jobs) + self.getLength(self.newjobs)
class JobGroup(WMObject): """ JobGroups are sets of jobs running on files who's output needs to be merged together. """ def __init__(self, subscription=None, jobs=None): self.jobs = [] self.newjobs = [] self.id = 0 if isinstance(jobs, list): self.newjobs = jobs elif jobs is not None: self.newjobs = [jobs] self.subscription = subscription self.output = Fileset() self.last_update = datetime.datetime.now() def add(self, job): """ _add_ Add a Job or list of jobs to the JobGroup. """ jobList = self.makelist(job) self.newjobs.extend(jobList) return def commit(self): """ _commit_ Move any jobs in the newjobs dict to the job dict. Empty the newjobs dict. """ self.jobs.extend(self.newjobs) self.newjobs = [] def commitBulk(self): """ Dummy method for consistency with WMBS implementation """ self.commit() def addOutput(self, file): """ _addOutput_ Add a File to the JobGroup's output fileset. The File is committed to the Fileset immediately. """ self.output.addFile(file) self.output.commit() def getJobs(self, type="list"): """ _getJobs_ Retrieve all of the jobs in the JobGroup. The output will either be returned as a list of Job objects (when type is "list") or a list of Job IDs (when type is "id"). """ if type == "list": return self.jobs elif type == "id": jobIDs = [] for job in self.jobs: jobIDs.append(job["id"]) return jobIDs else: print("Unknown type: %s" % type) return def getOutput(self, type="list"): """ _getOutput_ Retrieve all of the files that are in the JobGroup's output fileset. Type can be one of the following: list, set, lfn, id. """ return self.output.getFiles(type=type) def getLength(self, obj): """ This just gets a length for either dict or list objects """ if isinstance(obj, (dict, list)): return len(obj) else: return 0 def __len__(self): """ Allows use of len() on JobGroup """ return self.getLength(self.jobs) + self.getLength(self.newjobs)
class Subscription(Pickleable, dict): def __init__(self, fileset=None, workflow=None, split_algo="FileBased", type="Processing"): if fileset == None: fileset = Fileset() self.setdefault('fileset', fileset) self.setdefault('workflow', workflow) self.setdefault('type', type) self.setdefault('split_algo', split_algo) self.available = Fileset(name=fileset.name, files=fileset.getFiles()) self.acquired = Fileset(name='acquired') self.completed = Fileset(name='completed') self.failed = Fileset(name='failed') def name(self): return self.getWorkflow().name.replace(' ', '') + '_' + \ self.getFileset().name.replace(' ', '') def getWorkflow(self): return self["workflow"] def workflowName(self): if self["workflow"] == None: return "Unknown" return self["workflow"].name def workflowType(self): if self["workflow"] == None: return "Unknown" return self["workflow"].wfType def taskName(self): if self['workflow'] == None: return "Unknown" return self['workflow'].task def owner(self): if self['workflow'] == None: return 'Unknown' return self['workflow'].owner def getFileset(self): return self['fileset'] def acquireFiles(self, files=[], size=1): """ Return the files acquired """ self.acquired.commit() self.available.commit() self.failed.commit() self.completed.commit() retval = [] if len(files): for i in files: # Check each set, instead of elif, just in case something has # got out of synch if i in self.available.files: self.available.files.remove(i) if i in self.failed.files: self.failed.files.remove(i) if i in self.completed.files: self.completed.files.remove(i) self.acquired.addFile(i) else: if len(self.available.files) < size or size == 0: size = len(self.available.files) for i in range(size): self.acquired.addFile(self.available.files.pop()) return self.acquired.listNewFiles() def completeFiles(self, files): """ Return the number of files complete """ self.acquired.commit() self.available.commit() self.failed.commit() self.completed.commit() for i in files: # Check each set, instead of elif, just in case something has # got out of synch if i in self.available.files: self.available.files.remove(i) if i in self.failed.files: self.failed.files.remove(i) if i in self.acquired.files: self.acquired.files.remove(i) self.completed.addFile(i) def failFiles(self, files): """ Return the number of files failed """ self.acquired.commit() self.available.commit() self.failed.commit() self.completed.commit() for i in files: # Check each set, instead of elif, just in case something has # got out of synch if i in self.available.files: self.available.files.remove(i) if i in self.completed.files: self.completed.files.remove(i) if i in self.acquired.files: self.acquired.files.remove(i) self.failed.addFile(i) def filesOfStatus(self, status=None, doingJobSplitting=False): """ _filesOfStatus_ Return a Set of File objects that are associated with the subscription and have a particular status. """ status = status.title() if status == 'Available': return self.available.getFiles(type='set') - \ (self.acquiredFiles() | self.completedFiles() | self.failedFiles()) elif status == 'Acquired': return self.acquired.getFiles(type='set') elif status == 'Completed': return self.completed.getFiles(type='set') elif status == 'Failed': return self.failed.getFiles(type='set') def availableFiles(self, limit=None, doingJobSplitting=False): """ _availableFiles_ Return a Set of files that are available for processing (e.g. not already in use) """ if limit: return list( self.filesOfStatus( status="Available", doingJobSplitting=doingJobSplitting))[:limit] else: return self.filesOfStatus(status="Available", doingJobSplitting=doingJobSplitting) def acquiredFiles(self): """ Set of files marked as acquired. """ return self.filesOfStatus(status="Acquired") def completedFiles(self): """ Set of files marked as completed. """ return self.filesOfStatus(status="Completed") def failedFiles(self): """ Set of files marked as failed. """ return self.filesOfStatus(status="Failed")