def runStorage(project,name='Storage'): """ Execute Storage system streaming component to - allocate execution slots and - write job options. Arguments: @param name System name @param mgr PVSS controls manager @param sim List of stream slices for task simulation simulated @author M.Frank """ import Online.RunInfoClasses.RecStorage as RI import Online.AllocatorControl as Control import Online.Streaming.Allocator as StreamAllocator #import Online.ProcessorFarm.FarmDescriptor as Farm mgr = _mgr(FarmSetup.storage_system) info = RI.StorageInfoCreator() print 'Starting storage' blk_policy = StreamAllocator.BlockSlotPolicy all_policy = StreamAllocator.AllSlotPolicy streamer = StreamAllocator.Allocator(mgr,name,info,policy=(blk_policy,all_policy)) streamer.fsmManip._startupInfo = _startupInfo streamer.fsmManip._optsFile = _optsFile streamer.fsmManip._fifoName = _fifoName streamer.recv_slots_per_node = 25 streamer.strm_slots_per_node = 25 streamer.showSetup() #writer = Farm.RecStorageOptionsWriter(mgr,name,info) #ctrl = Control.Control(mgr,name,'Alloc',[streamer,writer]).run() ctrl = Control.Control(mgr,name,'Alloc',[streamer]).run() return (ctrl,mgr)
def runStorage(name='Storage', sim=None, joboptions=True): """ Execute Storage system streaming component to - allocate execution slots and - write job options. Arguments: @param name System name @param mgr PVSS controls manager @param sim List of stream slices for task simulation simulated @author M.Frank """ import Online.RunInfoClasses.Storage as RI import Online.AllocatorControl as Control import Online.Streaming.Allocator as StreamAllocator import Online.JobOptions.OptionsWriter as JobOptions mgr = _mgr(Params.storage_system_name) info = RI.StorageInfoCreator() streamer = StreamAllocator.Allocator(mgr, name, info) streamer.recv_slots_per_node = 2 streamer.strm_slots_per_node = 1 ##streamer.strmAllocationPolicy = StreamAllocator.RoundRobinSlotPolicy controllers = [streamer] if joboptions: controllers.append(JobOptions.StorageOptionsWriter(mgr, name, info)) ctrl = Control.Control(mgr, name, 'Alloc', controllers).run() return (ctrl, run(name, mgr, sim))
def __init__(self, manager, name, infoCreator): self.manager = manager self.name = name self.dpName = self.name self.farm = FarmInfo(self.manager, self.name) self.infoCreator = infoCreator self.streamAlloc = StreamAlloc(self.manager, self.name + 'IO', self.infoCreator)
def _runMonitoring(info, name, sim, joboptions=None): """ Execute Monitoring system streaming component to - allocate execution slots and - write job options. Arguments: @param name System name @param sim List of stream slices for task simulation simulated @author M.Frank """ import Online.AllocatorControl as Control import Online.Streaming.Allocator as StreamAllocator mgr = _mgr(Params.monitor_system_name) streamer = StreamAllocator.Allocator(mgr, name, info) controllers = [streamer] if joboptions: controllers.append(joboptions(mgr, name, info)) ctrl = Control.Control(mgr, name, 'Alloc', controllers).run() return (ctrl, run(name, mgr, sim))
class FarmSliceManager: # =========================================================================== def __init__(self, manager, name, infoCreator): self.manager = manager self.name = name self.dpName = self.name self.farm = FarmInfo(self.manager, self.name) self.infoCreator = infoCreator self.streamAlloc = StreamAlloc(self.manager, self.name + 'IO', self.infoCreator) # =========================================================================== def get(self, name): """ Return datapoint in Streaming control structure @param internal datapoint name @return Initialized PVSS datapoint """ return DataPoint(self.manager, DataPoint.original(self.name + 'Alloc.' + name)) # =========================================================================== def slices(self): rdr = self.manager.devReader() actor = PVSS.DpVectorActor(self.manager) names = PVSS.DpVectorActor(self.manager) typ = self.manager.typeMgr().type('RecoFarmSlice') actor.lookupOriginal(self.dpName + '_Slice*.InUse', typ) names.lookupOriginal(self.dpName + '_Slice*.Name', typ) rdr.add(actor.container) rdr.add(names.container) if rdr.execute(): slices = [] num = len(actor.container) for j in xrange(num): slices.append( (names.container[j].data, actor.container[j].data)) return slices return None # =========================================================================== def findSlice(self, slice): slices = self.slices() for i in slices: if i[0] == slice: return i return None # =========================================================================== def findFreeSlice(self): slices = self.slices() for i in slices: if i[1] == 0: return i[0] return None # =========================================================================== def makeError(self, msg): error(msg, timestamp=1) return None # =========================================================================== def _free(self, slice_obj, writer=None): "Free an allocated slice from pool and return allocated subfarms back to the farm description" slice, index = slice_obj if writer is None: writer = self.manager.devWriter() self.farm.free(slice, writer) inUse = DataPoint(self.manager, DataPoint.original(slice + '.InUse')) inUse.data = 0 writer.add(inUse) sf = DataPoint(self.manager, DataPoint.original(slice + '.SubFarms')) sf.data = std.vector('std::string')() writer.add(sf) act = DataPoint(self.manager, DataPoint.original(slice + '.RunInfo')) act.data = '' writer.add(act) if writer.execute(): return self return None # =========================================================================== def _freeIO(self, slice, writer=None): self.streamAlloc.free(slice, slice) return self # =========================================================================== def _allocate(self, activity, slice, numSubFarms, writer=None): "Allocate a given number of subfarms from pool" if writer is None: writer = self.manager.devWriter() farms = self.farm.getFreeSubFarms() if len(farms) < numSubFarms: log('Cannot allocate ' + str(numSubFarms) + ' sub-farms. Got only:' + str(len(farms)), timestamp=1) return None farms = [farms[i] for i in xrange(numSubFarms)] inUse = DataPoint(self.manager, DataPoint.original(slice + '.InUse')) inUse.data = 1 writer = self.manager.devWriter() writer.add(inUse) sf = DataPoint(self.manager, DataPoint.original(slice + '.SubFarms')) sf.data = [i[0] for i in farms] writer.add(sf) act = DataPoint(self.manager, DataPoint.original(slice + '.RunInfo')) act.data = 'RecoFarmActivity_' + activity if not DataPoint.exists(act.data): error('The activity ' + activity + ' does not exist.', timestamp=1) return None writer.add(act) self.farm.allocate(slice, farms, writer) if writer.execute(): log('Allocated slice:%s with %d subfarms:%s' % (slice, numSubFarms, [i for i in sf.data]), timestamp=1) return self return None # =========================================================================== def _allocateIO(self, rundp_name, slice, writer=None): ioSlice_name = self.streamAlloc.allocate(rundp_name, slice) if writer is None: writer = self.manager.devWriter() if ioSlice_name: ioSlice = DataPoint(self.manager, DataPoint.original(slice + '.FSMSlice')) ioSlice.data = ioSlice_name writer.add(ioSlice) if writer.execute(): log('Got I/O partition:' + ioSlice_name + ' for slice:' + slice, timestamp=1) return self log('Failed to update I/O partition:' + ioSlice_name + ' for slice:' + slice, timestamp=1) log('No I/O partition availible for slice:' + slice, timestamp=1) return None1 # =========================================================================== def allocate(self, activity, numSubFarms): "Allocate a given number of subfarms from pool" slice = self.findFreeSlice() numSubFarms = int(numSubFarms) if activity[:len('RecoFarmActivity_')] == 'RecoFarmActivity_': activity = activity[len('RecoFarmActivity_'):] log('Executing allocate: ' + activity + ' NSF:' + str(numSubFarms) + ' Slice:' + slice, timestamp=1) writer = self.manager.devWriter() if slice: rundp_name = self.manager.name() + ':' + slice log('Allocating slice:' + slice + ' RunDP:' + rundp_name, timestamp=1) if self._allocate(activity, slice, numSubFarms, writer): if self._allocateIO(rundp_name, slice, writer): return slice return None return self.makeError('Failed to find free slice...') # =========================================================================== def free(self, rundp_name, slice): "Free an allocated slice from pool and return allocated subfarms back to the farm description" # slice = PVSS.DataPoint.dpname(rundp_name) slice_obj = self.findSlice(slice) if slice_obj: self.streamAlloc.load() if self._free(slice_obj): if self._freeIO(slice): log('Free slice:%s' % (slice), timestamp=1) return slice return self.makeError( 'Failed to free I/O partition for slice:' + slice) return self.makeError('Failed to write data when freeing slice ' + slice) return self.makeError('Failed to find slice:' + slice) # =========================================================================== def _configureIO(self, rundp_name, slice_obj, activity, farm): slice, index = slice_obj writer = self.manager.devWriter() activityDp = DataPoint(self.manager, DataPoint.original(slice + '.Activity')) activityDp.data = 'RecoFarmActivity_' + activity runInfo = DataPoint(self.manager, DataPoint.original(slice + '.RunInfo')) runInfo.data = activityDp.data partition = DataPoint(self.manager, DataPoint.original(slice + '.general.partName')) partition.data = slice writer.add(activityDp) writer.add(partition) writer.add(runInfo) if writer.execute(): dp, io_slice, id = self.streamAlloc.getPartition(slice) res = self.streamAlloc.configure(rundp_name, slice) if res: runInfo = DataPoint(self.manager, DataPoint.original(io_slice + '.RunInfo')) runInfo.data = activityDp.data writer.add(runInfo) if writer.execute(): return res return None # =========================================================================== def _configure(self, slice, tasks): start = time.time() self.detName = slice self.name = slice fsm_manip = FSMmanip(self, '_FwFsmDevice', match='*') self.name = 'Reco' fsm_manip.optionsFile = self._optsFile print '....Collecting task slots.... %7.2f %d' % ( time.time() - start, fsm_manip.writer.length()) fsm_manip.collectTaskSlots() print '....Allocating task slots.... %7.2f %d' % ( time.time() - start, fsm_manip.writer.length()) for sf in tasks.keys(): task_set = tasks[sf] if fsm_manip.allocateProcesses(task_set) is None: return None print '....Resetting task slots..... %7.2f %d' % ( time.time() - start, fsm_manip.writer.length()) result = fsm_manip.reset() if result is None: return None print '....All Done................. %7.2f %s' % (time.time() - start, str(result)) return self # =========================================================================== def defineTasks(self, slice_obj, activity, farms): slice, index = slice_obj task_set = {} dns = self.manager.hostName() sys = self.manager.name() cl0 = 'Class0' cl1 = 'Class1' cl2 = 'Class2' nset = -1 max_tasks = 40 for subfarm_name, index in farms: task_set[subfarm_name] = {} sf = SubFarmInfo(self.manager, self.name + '_' + subfarm_name).load() cpus = sf.nodesCPU() nodes = sf.processors() tasks = [] nset = nset + 1 set_name = 'TaskSet%02d' % nset opts = '("' + subfarm_name + '","' + set_name + '")' for i in xrange(nodes.size()): p = nodes[i] ncpu = 6 #cpus[i] node_tasks = [] for j in activity.farmInfrastructure.data: node_tasks.append( [p, p + '_' + j, p + '_' + j, j, cl0, dns, sys, opts]) typ = activity.farmReceiver.data node_tasks.append([ p, p + '_' + typ, p + '_' + typ, typ, cl1, dns, sys, opts ]) typ = activity.farmSender.data node_tasks.append([ p, p + '_' + typ, p + '_' + typ, typ, cl2, dns, sys, opts ]) for j in xrange(ncpu): itm = '_%02d' % j for k in activity.farmTasks.data: t = p + '_%s%s' % (k, itm) node_tasks.append([p, t, t, k, cl1, dns, sys, opts]) if len(tasks) + len(node_tasks) <= max_tasks: for j in node_tasks: tasks.append(j) else: task_set[subfarm_name][set_name] = tasks tasks = node_tasks nset = nset + 1 set_name = 'TaskSet%02d' % nset opts = '("' + subfarm_name + '","' + set_name + '")' opts = '("' + subfarm_name + '","' + set_name + '")' task_set[subfarm_name][set_name] = tasks return task_set # =========================================================================== def configure(self, rundp_name, activity): log('Configuring:' + rundp_name + ' Activity:' + activity, timestamp=1) slice = PVSS.DataPoint.dpname(rundp_name) slice_obj = self.findSlice(slice) if slice_obj is None: return self.makeError('Failed to find slice:' + slice) if activity[:len('RecoFarmActivity_')] == 'RecoFarmActivity_': activity = activity[len('RecoFarmActivity_'):] act = FarmActivity(self.manager, 'RecoFarmActivity_' + activity).load() if not act: return self.makeError('Failed to find reconstruction activity:' + activity) farms = self.farm.getAllocatedSubFarms(slice) self.streamAlloc.load() act.show() for name, index in farms: sf = SubFarmInfo(self.manager, self.name + '_' + name).load() sf.show() task_set = self.defineTasks(slice_obj, act, farms) for n, tasks in task_set.items(): print n, ':' for s, t in tasks.items(): print '\t', s for tt in t: print '\t\t\t', str(tt).replace('[', '').replace( ', ', '/').replace("'", '').replace(']', '') if self._configure(slice, task_set) is None: self._free(slice_obj) return self.makeError('Failed configure slice ' + slice + ' for activity:' + activity) if not self._configureIO(rundp_name, slice_obj, activity, farms): self._free(slice_obj) return self.makeError('Failed configure I/O for slice ' + slice + ' and activity:' + activity) return self # =========================================================================== def recover(self, rundp_name, partition): return self.free(rundp_name, partition) # =========================================================================== def recover_slice(self, rundp_name, partition): log('Recover ' + partition + ' runDP:' + rundp_name) return 'SUCCESS' # self.free(rundp_name, partition) # =========================================================================== def _optsFile(self, name, type): if type == 'Brunel': return type + '.opts' return name + '.opts' # =========================================================================== def detectorName(self): return self.detName