def generateJobComponents(UnitDir, UnitID, JobIndex, WLUnit, NumComponents, ComponentSize, Application): # minutes: 3, 15, 30, 700 RunTimeInMinutesList = ["3", "15", "30", "700"] ComponentsList = [] # --- generate wall time once for all components of this job MaxWallTime = AIRandomUtils.getRandomListElement(RunTimeInMinutesList) if UnitDir[0] != "/": if sys.platform.find("linux") >= 0: UnitDir = os.path.join(os.environ["PWD"], UnitDir) else: UnitDir = os.path.join(os.getcwd(), UnitDir) # -- hashim's workload definition ComponentDirName = "%d-%d-%d" % (JobIndex, NumComponents, ComponentSize) FullComponentDirName = os.path.join(UnitDir, "%s/" % UnitID, ComponentDirName) # --- Create output directory, if it does not exist if os.path.exists(FullComponentDirName): if not os.path.isdir(FullComponentDirName): print "Output for job", JobIndex, "(" + FullComponentDirName + ")", "exists, but is not a directory", "...skipping job" return -1 else: try: os.makedirs(FullComponentDirName) except OSError, e: print "Cannot create output directory for job", JobIndex, "...skipping job" print "\tOS returned:", e return -1
def generateJobComponents( UnitDir, UnitID, JobIndex, WLUnit, \ PreComponentsList, JavaHomeDir, IbisAppsDir, IbisLibDir, \ JavaMaxMemoryLimitMB, Application, NCPUs): # minutes: 3, 15, 30, 700 RunTimeInMinutesList = [ '3', '15', '30', '700' ] NumComponents = len(PreComponentsList) ComponentsList = [] #--- generate wall time once for all components of this job MaxWallTime = AIRandomUtils.getRandomListElement( RunTimeInMinutesList ) if UnitDir[0] != '/': if sys.platform.find("linux") >= 0: UnitDir = os.path.join( os.environ['PWD'], UnitDir ) else: UnitDir = os.path.join( os.getcwd(), UnitDir ) ### THE UGLY HACK: convert /disk1/home3/koala5/grenchmark to /home/koala5/grenchmark pos = UnitDir.find('/koala') if pos >= 0: UnitDir = '/home' + UnitDir[pos:] #-- IBIS workload definition ComponentDirName = "%d-%d" % ( JobIndex, NumComponents ) FullComponentDirName = os.path.join( UnitDir, "%s/" % UnitID, ComponentDirName ) #--- Create output directory, if it does not exist if os.path.exists( FullComponentDirName ): if not os.path.isdir( FullComponentDirName ): print "Output for job", JobIndex, "("+FullComponentDirName+")", "exists, but is not a directory", "...skipping job" return None else: try: os.makedirs( FullComponentDirName ) except OSError, e: print "Cannot create output directory for job", JobIndex, "...skipping job" print '\tOS returned:', e return None
def ExpandApplication(Application): """ replace an ambiguous application name (*) with a real name (randomly selected from the global applications list) """ #-- find all matching names FullNamesList = [] #-- replace the user friendly '*' with the Python RegExp correspondent Application = Application.replace('*', '\w+') # re's synthax requires + for 1/1+ matches SearchRE = re.compile(Application) #-- create the list of applications whose name match the request for App in IBIS_Apps.keys(): if SearchRE.search('^' + App): FullNamesList.append(App) #-- select one name or die #print FullNamesList if len(FullNamesList) == 0: raise Exception, 'IBIS generator: Wrong application name ' + Application + ' (cannot expand).' FullApplicationName = AIRandomUtils.getRandomListElement(FullNamesList) return FullApplicationName
def generateComponent( self, bGenerateRandom = 1, \ Size = 0, N = 0, M = 0, S = 0, C = 0, MaxWallTime = 0 ): """ Generates one component using SSER as the application. This method does NOT write a physical JDF, but generates all the needed parameters, directories, and input files instead. The WLMain.generateWorkload is responsible for actualy writing the JDFs. In: bGenerateRandom -- whether this component is to be generated randomly (>0 for true, <=0 for false) size, N, M, S, C, MaxWallTime -- only valid if bGenerateRandom is 0 Return: Int, >=0 on success, <0 otherwise Notes: o Upon success, the ComponentData will contain at least the following fields: executable -- the complete path to an executable file stdout -- a file that will receive the standard output messages stderr -- a file that will receive the standard error messages name -- a name for this component (if unique, the better) description -- a description of this component directory -- the directory where the component should run maxWallTime -- the max time requested this app should run arguments -- the list of arguments to be fed to the component's application env -- the list of environmental variables, as (Name, Value) tuples stagein -- a list of files to be staged in stageout -- a list of files to be staged out """ if bGenerateRandom > 0: if self.HaveInput > 0: InputSize = AIRandomUtils.getRandomListElement( SSERComponent.SSER_ParKSuperSizes ) else: InputSize = 0 if self.HaveOutput > 0: OutputSize = AIRandomUtils.getRandomListElement( SSERComponent.SSER_ParKSuperSizes ) else: OutputSize = 0 N = AIRandomUtils.getRandomListElement( SSERComponent.SSER_ParSupersteps ) M = AIRandomUtils.getRandomListElement( SSERComponent.SSER_ParMemoryKItems ) S = AIRandomUtils.getRandomListElement( SSERComponent.SSER_ParMemoryElementsPerItem ) C = AIRandomUtils.getRandomListElement( SSERComponent.SSER_ParComputationPerMemoryItem ) # N = SSERComponent.SSER_ParSupersteps[2] # M = AIRandomUtils.getRandomListElement( SSERComponent.SSER_ParMemoryKItems ) # S = AIRandomUtils.getRandomListElement( SSERComponent.SSER_ParMemoryElementsPerItem ) # C = SSERComponent.SSER_ParComputationPerMemoryItem[3] MaxWallTime = AIRandomUtils.getRandomListElement( SSERComponent.SSER_RunTimeInMinutes ) if self.UnitDir[0] != '/': if sys.platform.find("linux") >= 0: self.UnitDir = os.path.join( os.environ['PWD'], self.UnitDir ) else: self.UnitDir = os.path.join( os.getcwd(), self.UnitDir ) ## too long component dir name #ComponentDirName = "%s_sser_%dx_%d_i%d_o%d" % \ # (self.ComponentData['id'], self.ComponentData['count'], int(N), int(InputSize), int(OutputSize)) ComponentDirName = "%s_sser" % self.ComponentData['id'] FullComponentDirName = os.path.join( self.UnitDir, ComponentDirName ) #--- Create output directory, if it does not exist if os.path.exists( FullComponentDirName ): if not os.path.isdir( FullComponentDirName ): print "Output for job", self.ComponentData['id'], "("+FullComponentDirName+")", "exists, but is not a directory", "...skipping job" return -1 else: try: os.makedirs( FullComponentDirName ) except OSError, e: print "Cannot create output directory for job", self.ComponentData['id'] , "...skipping job" print '\tOS returned:', e return -1
def generateWorkloadUnit( UnitDir, UnitID, WLUnit, SubmitDurationMS, bGenerateRandom = 1 ): """ Out: UnitsDic o A dictionary with keys 'info' and 'jobs'. Notes: o UnitsDic['info'] contains a dictionary of jobs info, indexed with an integer counter o Each job info contains at least the keys name, description, jdf, submitCommand, runTime o UnitsDic['jobs'] contains a dictionary of jobs data, indexed with an integer counter o Each job data contains the list of components of that job o Each component in the list is a dictionary with at least the following fields: executable, stdout, stderr, name, description, directory, maxWallTime, arguments, env, stagein, stageout See also: utils/WLDocHandlers.WLSubmitJobKeys this_file.SMPI1Component.generateComponent """ global SMPI1_LastRunTime SMPI1Component.SMPI1_Exe = os.path.join( "/tmp", "smpi1t" ) SMPI1Component.SMPI1_ParKSizes = [ "0", "32", "128", "512", "1024" ] SMPI1Component.SMPI1_ParKSuperSizes = [ "0", "16", "32", "64", "128", "256" ] SMPI1Component.SMPI1_ParSupersteps = [ "1", "2", "5", "10", "20", "50", "100" ] SMPI1Component.SMPI1_ParMemoryKItems = [ "10", "25", "50", "100", "250", "500", "1000", "5000" ] SMPI1Component.SMPI1_ParMemoryElementsPerItem = [ "3", "4", "10", "100", "500", "1000" ] SMPI1Component.SMPI1_ParXChangeElementsPerStep = [ "100", "500", "1000", "10000", "50000" ] SMPI1Component.SMPI1_ParComputationPerMemoryItem = [ "2", "10", "100", "1000" ] SMPI1Component.SMPI1_RunTimeInMinutes = [ "2", "5", "10", "15" ] if 'otherinfo' in WLUnit.keys(): try: OneString = WLUnit['otherinfo']['Exe'] SMPI1Component.SMPI1_Exe = OneString except KeyError: pass try: OneIntList = AIParseUtils.readIntList( Text = WLUnit['otherinfo']['ParKSizes'], ItemSeparator = ',' ) SMPI1Component.SMPI1_ParKSizes = OneIntList except KeyError: pass try: OneIntList = AIParseUtils.readIntList( Text = WLUnit['otherinfo']['ParKSuperSizes'], ItemSeparator = ',' ) SMPI1Component.SMPI1_ParKSuperSizes = OneIntList except KeyError: pass try: OneIntList = AIParseUtils.readIntList( Text = WLUnit['otherinfo']['ParSupersteps'], ItemSeparator = ',' ) SMPI1Component.SMPI1_ParSupersteps = OneIntList except KeyError: pass try: OneIntList = AIParseUtils.readIntList( Text = WLUnit['otherinfo']['ParMemoryKItems'], ItemSeparator = ',' ) SMPI1Component.SMPI1_ParMemoryKItems = OneIntList except KeyError: pass try: OneIntList = AIParseUtils.readIntList( Text = WLUnit['otherinfo']['ParMemoryElementsPerItem'], ItemSeparator = ',' ) SMPI1Component.SMPI1_ParMemoryElementsPerItem = OneIntList except KeyError: pass try: OneIntList = AIParseUtils.readIntList( Text = WLUnit['otherinfo']['ParXChangeElementsPerStep'], ItemSeparator = ',' ) SMPI1Component.SMPI1_ParXChangeElementsPerStep = OneIntList except KeyError: pass try: OneIntList = AIParseUtils.readIntList( Text = WLUnit['otherinfo']['ParComputationPerMemoryItem'], ItemSeparator = ',' ) SMPI1Component.SMPI1_ParComputationPerMemoryItem = OneIntList except KeyError: pass try: OneIntList = AIParseUtils.readIntList( Text = WLUnit['otherinfo']['RunTimeInMinutes'], ItemSeparator = ',' ) SMPI1Component.SMPI1_RunTimeInMinutes = OneIntList except KeyError: pass try: NComponentsWithWeightsDic = \ AIParseUtils.readIntWithWeightsList( WLUnit['otherinfo']['NComponentsWithWeights'], DefaultWeight = 1.0 ) # NComponentsWithWeightsDic['Values']; NComponentsWithWeightsDic['TotalWeight'] except KeyError: raise Exception, "SMPI1 generator: Invalid NComponentsWithWeights info specified in the OtherInfo field." ##print ">>>>>> NComponentsWithWeightsDic", NComponentsWithWeightsDic[AIParseUtils.TOTAL_WEIGHT] try: TotalCPUsWithWeightsDic = \ AIParseUtils.readIntWithWeightsList( WLUnit['otherinfo']['TotalCPUsWithWeights'], DefaultWeight = 1.0 ) except KeyError: raise Exception, "SMPI1 generator: Invalid TotalCPUsWithWeights info specified in the OtherInfo field." ##print ">>>>>> TotalCPUsWithWeightsDic", TotalCPUsWithWeightsDic[AIParseUtils.TOTAL_WEIGHT] try: MinComponentSize = AIParseUtils.readInt(WLUnit['otherinfo']['MinComponentSize'], DefaultValue = 0) except KeyError: MinComponentSize = 1 #raise Exception, "SMPI1 generator: No MinComponentSize info specified in the OtherInfo field." try: MaxComponentSize = AIParseUtils.readInt(WLUnit['otherinfo']['MaxComponentSize'], DefaultValue = 0) except KeyError: MaxComponentSize = 10000000 #raise Exception, "SMPI1 generator: No MaxComponentSize info specified in the OtherInfo field." try: EqualCPUsPerComponent = AIParseUtils.readBoolean(WLUnit['otherinfo']['SMPI1AppsDir']) except KeyError: EqualCPUsPerComponent = 0 #raise Exception, "SMPI1 generator: No EqualCPUsPerComponent info specified in the OtherInfo field. Setting to default " + AIParseUtils.IntToBooleanDic[EqualCPUsPerComponent] + '.' try: AppBaseDir = WLUnit['otherinfo']['AppBaseDir'] except KeyError: AppBaseDir = '/tmp' #raise Exception, "SMPI1 generator: No AppBaseDir info specified in the OtherInfo field." print "WARNING! SMPI1 generator: No AppBaseDir info specified in the OtherInfo field.", "Assuming default:", AppBaseDir try: Application = WLUnit['otherinfo']['Application'] except KeyError: Application = 'smpi1t-gm' #raise Exception, "SMPI1 generator: No AppBaseDir info specified in the OtherInfo field." print "WARNING! SMPI1 generator: No Application info specified in the OtherInfo field.", "Assuming default:", Application try: Submitter = WLUnit['otherinfo']['Submitter'] except KeyError: Submitter = 'krunner -l DEBUG -g -e -o -f ${JDF}' print "WARNING! SMPI1 generator: No Submitter info specified in the OtherInfo field.", "Assuming default", Submitter try: SiteTypesWithWeightsDic = \ AIParseUtils.readStringWithWeightsList( WLUnit['otherinfo']['SiteTypesWithWeights'], DefaultWeight = 1.0 ) except KeyError: SiteTypesWithWeights = {AIParseUtils.VALUES:[], AIParseUtils.TOTAL_WEIGHT:0.0 } print "WARNING! SMPI1 generator: No SiteTypesWithWeights info specified in the OtherInfo field.\n\tSetting to default", SitesList ##print ">>>>>> SiteTypesWithWeightsDic", SiteTypesWithWeightsDic[AIParseUtils.TOTAL_WEIGHT] try: SitesWithWeightsDic = \ AIParseUtils.readStringWithWeightsList( WLUnit['otherinfo']['SitesWithWeights'], DefaultWeight = 1.0 ) except KeyError: SitesWithWeightsDic = {AIParseUtils.VALUES:[], AIParseUtils.TOTAL_WEIGHT:0.0 } print "WARNING! SMPI1 generator: No SitesWithWeights info specified in the OtherInfo field.\n\tSetting to default", SitesList ##print ">>>>>> SitesWithWeightsDic", SitesWithWeightsDic[AIParseUtils.TOTAL_WEIGHT] else: raise Exception, "SMPI1 generator: No OtherInfo data! Expected at least ComponentSize,NJobs,NComponents fields." UnitsDic = {} UnitsDic['info'] = {} UnitsDic['jobs'] = {} #-- init start time try: StartAt = AIParseUtils.readInt(WLUnit['otherinfo']['StartAt'], DefaultValue = 0) StartAt = StartAt * 1000 # the time is given is seconds except KeyError: StartAt = 0 SMPI1_LastRunTime = StartAt if 'FirstJobIndex' in WLUnit: JobIndex = int(WLUnit['FirstJobIndex']) else: JobIndex = 0 WLUnitMultiplicity = int(WLUnit['multiplicity']) if bGenerateRandom > 0 : index = 0 while index < WLUnitMultiplicity: # 1. generate a random site type (unordered or ordered) SiteType = AIRandomUtils.getRandomWeightedListElement( SiteTypesWithWeightsDic, ValuesKey = AIParseUtils.VALUES, TotalWeightKey = AIParseUtils.TOTAL_WEIGHT ) # 2. select the number of CPU for this job #TODO: repeat NCPUs selection until App can run on that many CPUs NCPUs = AIRandomUtils.getRandomWeightedListElement( TotalCPUsWithWeightsDic, ValuesKey = AIParseUtils.VALUES, TotalWeightKey = AIParseUtils.TOTAL_WEIGHT ) # 3. select the number of components and try to create subcomponents, # until successfully matching restrictions WhileCondition = 1 NTries = 100 while WhileCondition != 0 and NTries > 0: NTries = NTries - 1 NComponents = AIRandomUtils.getRandomWeightedListElement( NComponentsWithWeightsDic, ValuesKey = AIParseUtils.VALUES, TotalWeightKey = AIParseUtils.TOTAL_WEIGHT ) if NComponents > NCPUs: continue # impossible match, just retry NCPUsPerComponent = NCPUs / NComponents # ensure restrictions are met if NCPUsPerComponent < MinComponentSize or NCPUsPerComponent > MaxComponentSize: continue NRemainingComponents = NCPUs - NCPUsPerComponent * NComponents if NRemainingComponents > 0 and EqualCPUsPerComponent == 1: continue # >= -> including the extra CPU for some components if NRemainingComponents > 0 and NCPUsPerComponent == MaxComponentSize: continue PreComponentsList = [] for ComponentID in xrange(NComponents): # cpu count ToAssignHere = NCPUsPerComponent if ComponentID < NRemainingComponents: # assign the extra CPUs, round-robin ToAssignHere = ToAssignHere + 1 # site location if SiteType == 'ordered': Site = AIRandomUtils.getRandomWeightedListElement( SitesWithWeightsDic, ValuesKey = AIParseUtils.VALUES, TotalWeightKey = AIParseUtils.TOTAL_WEIGHT ) else: Site = '*' # create pre-component information PreComponent = {} PreComponent['id'] = ComponentID PreComponent['count'] = ToAssignHere PreComponent['location'] = Site # add pre-component PreComponentsList.append(PreComponent) WhileCondition = 0 if WhileCondition == 1: # failed to generate in 100 tries continue # try again for the same job #--- generate rnd job #-- generate job info UnitsDic['info'][index] = \ generateJobInfo( UnitDir, UnitID, JobIndex, \ WLUnit, SubmitDurationMS, Submitter ) #-- generate job components UnitsDic['jobs'][index] = \ generateJobComponents( UnitDir, UnitID, JobIndex, WLUnit, PreComponentsList, \ Application, AppBaseDir, \ bGenerateRandom ) index = index + 1 JobIndex = JobIndex + 1 WLUnit['generatedjobs'] = index else: index = 0 for Size in SMPI1Component.SMPI1_ParKSuperSizes: for N in SMPI1Component.SMPI1_ParSupersteps: for M in SMPI1Component.SMPI1_ParMemoryKItems: for S in SMPI1Component.SMPI1_ParMemoryElementsPerItem: for X in SMPI1Component.SMPI1_ParXChangeElementsPerStep: for C in SMPI1Component.SMPI1_ParComputationPerMemoryItem: raise Exception, "ERROR! SMPI1 is not finished!" #--- generate wall time once for all components of this job MaxWallTime = AIRandomUtils.getRandomListElement( SMPI1Component.SMPI1_RunTimeInSeconds ) #-- generate a random site type (unordered, ordered, ...) SiteType = AIRandomUtils.getRandomWeightedListElement( SiteTypesWithWeightsDic, ValuesKey = AIParseUtils.VALUES, TotalWeightKey = AIParseUtils.TOTAL_WEIGHT ) #-- generate job info UnitsDic['info'][index] = \ generateJobInfo( UnitDir, UnitID, JobIndex, \ WLUnit, SubmitDurationMS ) #-- generate job components UnitsDic['jobs'][index] = \ generateJobComponents( UnitDir, UnitID, JobIndex, WLUnit, \ bGenerateRandom, SiteType, Size, \ N, M, S, X, C, MaxWallTime ) index = index + 1 JobIndex = JobIndex + 1 #-- set generated jobs WLUnit['generatedjobs'] = index return UnitsDic
AppJARFullPath ] ) #ParamsList StageIn StageOut MaxReqMemory Description if Application not in IBIS_Apps.keys(): raise Exception, 'IBIS generator: Application ' + Application + 'not known!' AppMemoryLimit = IBIS_Apps[Application][IBISConsts.MaxReqMemory] if JavaMaxMemoryLimitMB < AppMemoryLimit: print "WARNING! IBIS generator:", "Although the application specific JVM memory limit was", AppMemoryLimit, "the JavaMaxMemoryLimitMB field forced it to", JavaMaxMemoryLimitMB AppMemoryLimit = JavaMaxMemoryLimitMB #-- need the same parameters for all the data ParamsData = IBIS_Apps[Application][IBISConsts.ParamsList] if type(ParamsData) == list: ParamsData = AIRandomUtils.getRandomListElement( ParamsData ) for PreComponent in PreComponentsList: #--- reset component data ComponentData = {} #--- generate component data ComponentData['id'] = "%s-%d-%2.2d" % ( UnitID, JobIndex, PreComponent['id'] ) ComponentData['name'] = "%s_ibis" % ComponentData['id'] ComponentData['location'] = PreComponent['location'] ##ComponentData['jobtype'] = 'mpi' ComponentData['count'] = PreComponent['count'] ComponentData['description'] = \ "Ibis Workload Job, Type=%s Count=%d, maxWallTime=%d" % \ (Application, ComponentData['count'], int(MaxWallTime) ) ComponentData['directory'] = FullComponentDirName
try: StartAt = AIParseUtils.readInt(WLUnit["otherinfo"]["StartAt"], DefaultValue=0) StartAt = StartAt * 1000 # the time is given is seconds except KeyError: StartAt = 0 HSH_LastRunTime = StartAt WLUnitMultiplicity = int(WLUnit["multiplicity"]) if bGenerateRandom > 0: # --- generate random JobIndex = 0 if WLUnitMultiplicity > 1: NJobs = NJobs * WLUnitMultiplicity while JobIndex < NJobs: ComponentSize = AIRandomUtils.getRandomListElement(ComponentSizeList) NumComponents = AIRandomUtils.getRandomListElement(NumComponentsList) CurrentUnitID = "%s-%d" % (UnitID, JobIndex % WLUnitMultiplicity) UnitsDic["info"][JobIndex] = generateJobInfo( UnitDir, CurrentUnitID, JobIndex, WLUnit, SubmitDurationMS, Submitter ) UnitsDic["jobs"][JobIndex] = generateJobComponents( UnitDir, CurrentUnitID, JobIndex, WLUnit, NumComponents, ComponentSize, Application ) JobIndex = JobIndex + 1 WLUnit["generatedjobs"] = JobIndex else: print "Complete workload" # --- generate all