コード例 #1
0
ファイル: SSjobrunner.py プロジェクト: thu-pacman/Uberun
 def __init__(self, hostname, jobspec, name='JobRunner'):
     super().__init__()
     self.jobspec = jobspec
     self.hostname = hostname
     self.logger = SSLogger(name)
     # results for parent
     self.returns = dict()
コード例 #2
0
 def __init__(self, algoname='CE', alpha=0.9):
     self.MIN_DAEMONS = 8
     self.net = SSMasterNetwork()
     self.db = SSDatabase(algorithm=algoname)
     self.sched = SSScheduler(algoname=algoname, database=self.db)
     self.default_alpha = alpha
     self.prtl = SSProtocol()
     self.logger = SSLogger('Master')
     self.parser = SSParser()
     self.users = []
     self.daemons = []
コード例 #3
0
 def __init__(self, algoname, database):
     self.logger = SSLogger('Scheduler')
     if algoname == 'CE':
         self.algo = SSCEAlgorithm()
     elif algoname == 'CS':
         self.algo = SSCSAlgorithm()
     elif algoname == 'SS':
         self.algo = SSSSAlgorithm()
     else:
         self.logger.error('No such algorithm, use CE/CS/SS.')
     self.db = database
     self.logger.succ('Algorithm %s used for resource allocation' %
                      self.algo.name)
コード例 #4
0
 def __init__(self, alg='CE'):
     self.MIN_DAEMONS = 1
     self.clock = SimulationClock()
     self.db = SSDatabase(algorithm=alg,
                          simulationClock=self.clock,
                          logToFile=False)
     self.sched = SSScheduler(algoname=alg, database=self.db)
     self.logger = SSLogger('Simulator')
     self.parser = SSParser()
     self.users = []
     self.daemons = []
     self.trace = []
     self.pendingJobs = dict()
     self.runningJobs = dict()
コード例 #5
0
class SSScheduler:
    def __init__(self, algoname, database):
        self.logger = SSLogger('Scheduler')
        if algoname == 'CE':
            self.algo = SSCEAlgorithm()
        elif algoname == 'CS':
            self.algo = SSCSAlgorithm()
        elif algoname == 'SS':
            self.algo = SSSSAlgorithm()
        else:
            self.logger.error('No such algorithm, use CE/CS/SS.')
        self.db = database
        self.logger.succ('Algorithm %s used for resource allocation' %
                         self.algo.name)

    # the algorithm happens here
    # return a dictionary: daemon -> jobspec, and the estimation wall time
    def nextJob(self):
        # no node or no job, cannot schedule
        if len(self.db.pendingJobs) and len(self.db.cluster.nodes):
            jobid = self.db.mostPriorJob()
            # (parallelism, alpha, dict(scale->{time, ipcs, mbws}))
            profile = self.db.getProfile(jobid)
            # the scheduling algorithm decides the order to try different scales
            # or may only try part of them (CE only tries 1x, E)
            # data structure of candidate is the same with profile
            candidates = self.algo.sortCandidates(profile)
            # self.logger.echo(candidates)
            # try to allocate for each scale, if success, break
            allocation, est = None, None
            for parallelism, scale, mode, alpha, ipcs, mbws, toprofile in candidates:
                N, C, W, B = self.algo.calculateResourceDemand(
                    parallelism, scale, mode, alpha, ipcs, mbws)
                if N <= 0:  # N<=0 means not feasible
                    continue
                # resource allocation, if not available (None)
                # allocation is a dict, daemon -> jobspec (see Protocol)
                allocation = self.db.allocateFor(jobid, N, C, W, B, scale,
                                                 mode, toprofile)
                if allocation:
                    #self.logger.echo(candidates)
                    est = self.algo.estimate(profile, scale, W)
                    self.db.jobStart(jobid, est)
                    break
            if not allocation:
                self.db.jobStuck(jobid)
            return (allocation, est)
        return (None, None)
コード例 #6
0
ファイル: SSdatabase.py プロジェクト: thu-pacman/Uberun
    def __init__(self, algorithm, simulationClock=None, logToFile=True):
        # the file use to store history, in json format
        self.logToFile = logToFile
        if self.logToFile:
            self.historyFilename = 'JobLogs/%s_%s_%s_%s.txt' % (
                CFG.DB['history_prefix'], 'sim' if simulationClock else 'run',
                algorithm, datetime.utcnow().strftime('%Y%m%d-%H%M%S'))
        # the file use to store profile, in json format
        self.profileFilename = CFG.DB['profile_fname']
        # a simulated clock for simulation
        self.simulationClock = simulationClock

        self.jobToReq = dict()
        self.logger = SSLogger('Database')
        self.cluster = SSCluster()
        # Job id, inc by one
        self.jobid = 0
        # jobid -> jobattr
        # jobattr is a dict, e.g.,
        #   'jobname': 'MG16'  use to specify executable binary
        #   'framework': 'MPI' use to build running command
        #   'parallelism': 16 how many cores needed
        #   'alpha': a factor indicates tolerable performance loss
        self.jobidToJobattr = dict()
        # record the resource a job using
        self.jobidToResource = dict()
        # record what daemons a jobid is running on
        self.jobidToDaemons = dict()
        # record the returns of a job
        self.jobidToReturns = dict()
        # a priority criteria used for scheduling
        # jobid -> (current priority, stride, last check timestamp)
        self.jobidToPriority = dict()
        # profile data for programs
        # a program is the executable binary of a job, TODO, more accurate signature
        # currently, we use the jobname 'MG16' as the program signature
        # self.progToProfile['MG16'] is a dict
        # profile[scale factor] = {'time': exectution time, 'ipcs': ipc-ways curve, 'mbws': membw-ways curve}
        self.progToProfile = dict()
        self.loadProfileFromFile()
        # three lists: pending, running, finished
        self.pendingJobs = []
        self.runningJobs = []
        self.completedJobs = []
        # a container to store job history, submit/start/finish/allocation
        # use jobid as key
        self.history = dict()
コード例 #7
0
ファイル: SSdaemon.py プロジェクト: thu-pacman/Uberun
 def __init__(self):
     self.net = SSWorkerNetwork()
     self.prtl = SSProtocol()
     self.logger = SSLogger('Daemon')
     #self.msgLock = threading.Lock() 
     self.jobrunners = []
     #self.profiler = None
     time.sleep(1) # if not wait, will fail to connect, reason unknown
     self.net.sendObj(self.prtl.greeting('daemon', self.net.hostname)) # I am a daemon
コード例 #8
0
ファイル: SSnetwork.py プロジェクト: thu-pacman/Uberun
    def __init__(self, mode='worker'):
        self.logger = SSLogger('Network', info=False, echo=False)
        self.hostname = socket.gethostname()
        # mode: master or worker
        self.mode = mode
        # selector use for non-blocking io 
        self.sel = selectors.DefaultSelector()
        # connections, clinet -> connection
        self.connections = dict()
        # obj buffer for each connection, connection -> object list
        # buf[0] is the tailing string (without a '#' end flag)
        # buf[1] and after are completed commands
        self.objectBuffer = dict()
        # constant values
        self.EOC = CFG.NET['eoc']
        self.CONNECTION_BROKEN = CFG.NET['broken_conn_str']
        self.NEW_CONNECTION = CFG.NET['new_conn_str']
        self.SS_MASTER = socket.gethostbyname(CFG.NET['master_hostname']) 
        self.SS_PORT = CFG.NET['master_port']
        self.BACK_LOG = CFG.NET['master_backlog']

        # master and worker
        # master connects to all workers,
        # worker only connects to master, no inter-worker connections
        if mode == 'master': # master
            lsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # IPV4 and TCP
            lsock.bind(('', self.SS_PORT)) # accept from any
            lsock.listen(self.BACK_LOG)
            # use selector for non blocking IO, only check READ, assume always writable
            lsock.setblocking(False)
            self.sel.register(lsock, selectors.EVENT_READ, data=self.NEW_CONNECTION) 
            self.logger.info('Master started on %s' % socket.gethostname())
        else: # worker, both daemons and user frontends
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.setblocking(False)
            sock.connect_ex((self.SS_MASTER, self.SS_PORT))
            #data = types.SimpleNamespace(workerName=socket.gethostname())
            #workerName = socket.gethostname()
            self.sel.register(sock, selectors.EVENT_READ, data='master')
            self.connections['master'] = sock # only connects to master
            self.objectBuffer[sock] = ['']
            self.logger.info('Daemon started on %s' % socket.gethostname())
コード例 #9
0
 def __init__(self, name):
     self.name = name
     self.total_cores = CFG.CLUSTER['core_per_node']
     self.total_ways = CFG.CLUSTER['llcway_per_node']
     self.total_membw = CFG.CLUSTER['membw_per_node']
     self.logger = SSLogger(name='Algorithm')
コード例 #10
0
ファイル: SSdatabase.py プロジェクト: thu-pacman/Uberun
class SSDatabase:
    def __init__(self, algorithm, simulationClock=None, logToFile=True):
        # the file use to store history, in json format
        self.logToFile = logToFile
        if self.logToFile:
            self.historyFilename = 'JobLogs/%s_%s_%s_%s.txt' % (
                CFG.DB['history_prefix'], 'sim' if simulationClock else 'run',
                algorithm, datetime.utcnow().strftime('%Y%m%d-%H%M%S'))
        # the file use to store profile, in json format
        self.profileFilename = CFG.DB['profile_fname']
        # a simulated clock for simulation
        self.simulationClock = simulationClock

        self.jobToReq = dict()
        self.logger = SSLogger('Database')
        self.cluster = SSCluster()
        # Job id, inc by one
        self.jobid = 0
        # jobid -> jobattr
        # jobattr is a dict, e.g.,
        #   'jobname': 'MG16'  use to specify executable binary
        #   'framework': 'MPI' use to build running command
        #   'parallelism': 16 how many cores needed
        #   'alpha': a factor indicates tolerable performance loss
        self.jobidToJobattr = dict()
        # record the resource a job using
        self.jobidToResource = dict()
        # record what daemons a jobid is running on
        self.jobidToDaemons = dict()
        # record the returns of a job
        self.jobidToReturns = dict()
        # a priority criteria used for scheduling
        # jobid -> (current priority, stride, last check timestamp)
        self.jobidToPriority = dict()
        # profile data for programs
        # a program is the executable binary of a job, TODO, more accurate signature
        # currently, we use the jobname 'MG16' as the program signature
        # self.progToProfile['MG16'] is a dict
        # profile[scale factor] = {'time': exectution time, 'ipcs': ipc-ways curve, 'mbws': membw-ways curve}
        self.progToProfile = dict()
        self.loadProfileFromFile()
        # three lists: pending, running, finished
        self.pendingJobs = []
        self.runningJobs = []
        self.completedJobs = []
        # a container to store job history, submit/start/finish/allocation
        # use jobid as key
        self.history = dict()

    def loadProfileFromFile(self):
        # if no such file, create one
        if not os.path.exists(self.profileFilename):
            with open(self.profileFilename, 'w+') as fw:
                fw.write('')
        # in the file, each line is a dict
        # {'prog': prog, 'scale': scale, 'value': value}
        # self.progToProfile[prog][scale] = value
        cnt = 0
        with open(self.profileFilename, 'r') as fr:
            for line in fr.readlines():
                if len(line.strip()) == 0:
                    continue
                kv = json.loads(line)
                cnt += 1
                prog, scale, value = kv['prog'], kv['scale'], kv['value']
                if prog not in self.progToProfile:
                    self.progToProfile[prog] = dict()
                self.progToProfile[prog][scale] = value
        self.logger.info('Profile Loaded, %d Entries in total.' % cnt)

    def getTimestampNow(self):
        if self.simulationClock:
            return self.simulationClock.now()
        else:
            return datetime.utcnow().timestamp()

    def addDaemon(self, daemon, hostname):
        self.cluster.addNode(daemon, hostname)
        self.logger.debug('New daemon:', daemon, 'at', hostname)

    def addUserJob(self, job):
        jobid = self.jobid
        self.logger.debug('Job added [%d]: %s' % (jobid, job))
        self.jobidToJobattr[jobid] = job
        # add to the pending list
        self.pendingJobs.append(jobid)
        self.jobidToPriority[jobid] = {
            'value': 0,
            'stride': CFG.DB['default_stride'],
            'lastcheck': self.getTimestampNow()
        }
        # record the submit time
        self.history[self.jobid] = {
            'submitTime': self.getTimestampNow(),
            'jobattr': job
        }
        self.jobid += 1
        return jobid

    def jobStart(self, jobid, est=-1):
        self.cluster.resourceAlloc(self.jobidToResource[jobid], jobid)
        self.jobidToDaemons[jobid] = [
            x for x, _, _ in self.jobidToResource[jobid]
        ]
        self.jobidToReturns[jobid] = []
        #self.logger.debug(self.jobidToDaemons)
        self.pendingJobs.remove(jobid)
        self.runningJobs.append(jobid)
        # recover all priority stride
        for _, p in self.jobidToPriority.items():
            p['stride'] = CFG.DB['default_stride']
        self.history[jobid]['startTime'] = self.getTimestampNow()
        self.history[jobid]['estTime'] = est
        self.logger.info(
            'job [%d] (%s) starts, scale %d, resource req:' %
            (jobid, self.jobidToJobattr[jobid]['jobname'],
             self.history[jobid]['scale']), self.history[jobid]['NCWB'],
            ', on nodes:', self.history[jobid]['nodelist'],
            'NewProfiling' if self.history[jobid]['toprofile'] else 'InDB')

    def jobFinish(self, jobid):
        # record the end time
        self.history[jobid]['finishTime'] = self.getTimestampNow()
        jobtime = int(100 * (self.history[jobid]['finishTime'] -
                             self.history[jobid]['startTime'])) / 100
        # all returns from all daemons
        returns = self.jobidToReturns[jobid]
        # check exitcode, should be 0 for all
        exitcode = 0
        for ret in returns:
            ec = ret.get('exitcode', 0)
            if ec != 0:
                exitcode = ec
                break
        # if has estimation (est_time, est_speedup), return the est_time
        est = self.history[jobid]['estTime'][0] if self.history[jobid][
            'estTime'] else -1
        if exitcode != 0:
            self.logger.error(
                'job [%d] (%s) finishes after %.2f seconds (%.2f est), with exitcode %d'
                % (jobid, self.jobidToJobattr[jobid]['jobname'], jobtime, est,
                   exitcode))
        else:
            self.logger.info(
                'job [%d] (%s) finishes after %.2f seconds (%.2f est), with exitcode %d'
                % (jobid, self.jobidToJobattr[jobid]['jobname'], jobtime, est,
                   exitcode))
        # log the execution record
        if self.logToFile:
            with open(self.historyFilename, 'a') as fw:
                fw.write('JOBID %5d: %s\n' %
                         (jobid, json.dumps(self.history[jobid])))
        # update the profile
        if self.history[jobid]['toprofile']:
            scale = self.history[jobid]['scale']
            prog = self.jobidToJobattr[jobid]['jobname']
            # profile[scale factor] = {'time': exectution time, 'ipcs': ipc-ways curve, 'mbws': membw-ways curve}
            if prog not in self.progToProfile:
                self.progToProfile[prog] = dict()
            # may be repeated by several concurrent profiling runs, only the first one is used
            # ?? or use the last one ??
            if scale not in self.progToProfile[prog]:
                wcnt = CFG.CLUSTER['llcway_per_node'] + 1
                ipcs = [0] * wcnt
                mbws = [0] * wcnt
                ret_cnt = [0] * wcnt
                # average of all daemons
                for ret in returns:
                    if 'ipcs' not in ret:
                        continue
                    for w in range(1, wcnt):
                        ipc, mbw = ret['ipcs'][w], ret['mbws'][w]
                        if ipc > 0 and mbw > 0:
                            ipcs[w] += ipc
                            mbws[w] += mbw
                            ret_cnt[w] += 1
                for w in range(1, wcnt):
                    ipcs[w] = int(10000 * ipcs[w] /
                                  ret_cnt[w]) / 10000 if ret_cnt[w] > 0 else -1
                    mbws[w] = int(10000 * mbws[w] /
                                  ret_cnt[w]) / 10000 if ret_cnt[w] > 0 else -1
                self.progToProfile[prog][scale] = {
                    'time': jobtime,
                    'ipcs': ipcs,
                    'mbws': mbws
                }
                # log to file
                with open(self.profileFilename, 'a') as fw:
                    fw.write(
                        json.dumps({
                            'prog': prog,
                            'scale': scale,
                            'value': self.progToProfile[prog][scale]
                        }))
                    fw.write('\n')
                self.logger.debug('profile:', self.progToProfile[prog][scale])
        # update other data structures
        self.cluster.resourceFree(self.jobidToResource[jobid])
        self.jobidToDaemons.pop(jobid)
        self.completedJobs.append(jobid)
        self.runningJobs.remove(jobid)

    # should receive a message from each daemon, then the job is really completed.
    def daemonFinishJob(self, dae, jobid, jobreturns):
        self.jobidToDaemons[jobid].remove(dae)
        self.jobidToReturns[jobid].append(jobreturns)
        if len(self.jobidToDaemons[jobid]) == 0:
            self.jobFinish(jobid)

    def jobStuck(self, jobid):
        # decrease its priority stride
        self.jobidToPriority[jobid]['stride'] = CFG.DB['slow_stride']

    def mostPriorJob(self):
        # update the priority for all jobs
        now = self.getTimestampNow()
        for _, p in self.jobidToPriority.items():
            p['value'] += p['stride'] * (now - p['lastcheck'])
            p['lastcheck'] = now
        # sort pending jobs by their priority (highest first)
        self.pendingJobs.sort(
            key=lambda x: self.jobidToPriority[x]['value'] - x, reverse=True)
        return self.pendingJobs[0]

    # return all current profile of the program corresponding to jobid
    def getProfile(self, jobid):
        attr = self.jobidToJobattr[jobid]
        prog = attr['jobname']
        return (attr['parallelism'], attr['alpha'],
                self.progToProfile.get(prog, None))

    # find allocation (None if not found)
    # scale and mode are for record in history, the NCWB values already imply them
    def allocateFor(self, jobid, N, C, W, B, scale, mode, toprofile):
        # some jobs cannot be scaling out
        if self.jobidToJobattr[jobid][
                'framework'] == 'TensorFlow':  # now we use only single node tf programs
            if scale != 1:
                return None
        # do not allow spread for big jobs. (half machine)
        if N > 32 and scale > 1 and N / scale > 0.5 * len(self.cluster.nodes):
            return None
        # try to allocate resource
        perNodeReq = {'C': C, 'W': W, 'B': B}
        resourceAllocation = self.cluster.search(N, perNodeReq)
        if resourceAllocation:
            self.jobidToResource[jobid] = resourceAllocation
            #self.logger.debug('Resource can be allocated for', jobid)
            alloc = []
            affinity = dict()
            for daemon, _, _ in resourceAllocation:
                affinity[self.cluster.nodes[daemon]
                         ['hostname']] = self.cluster.nodes[daemon]['core']
            nodelist = sorted(affinity.keys())
            leadnode = nodelist[0]
            for daemon, _, _ in resourceAllocation:
                jobspec = {
                    'jobid': jobid,
                    'jobattr': self.jobidToJobattr[jobid],
                    'coremap': self.cluster.nodes[daemon]['core'],
                    'llcwaymap': self.cluster.nodes[daemon]['llcway'],
                    'leadnode': leadnode,
                    'toprofile': toprofile
                }
                #if self.cluster.nodes[daemon]['hostname'] == leadnode:
                #jobspec['nodelist'] = nodelist
                jobspec['affinity'] = affinity
                alloc.append((daemon, jobspec))
            self.history[jobid]['allocation'] = alloc
            self.history[jobid]['nodelist'] = nodelist
            self.history[jobid]['NCWB'] = (N, C, W, B)
            self.history[jobid]['scale'] = scale
            self.history[jobid]['mode'] = mode
            self.history[jobid]['toprofile'] = toprofile
            return alloc
        else:
            #self.logger.warn('Cannot allocate resource for', jobid)
            return None
コード例 #11
0
ファイル: SSnetwork.py プロジェクト: thu-pacman/Uberun
class SSNetwork:
    def __init__(self, mode='worker'):
        self.logger = SSLogger('Network', info=False, echo=False)
        self.hostname = socket.gethostname()
        # mode: master or worker
        self.mode = mode
        # selector use for non-blocking io 
        self.sel = selectors.DefaultSelector()
        # connections, clinet -> connection
        self.connections = dict()
        # obj buffer for each connection, connection -> object list
        # buf[0] is the tailing string (without a '#' end flag)
        # buf[1] and after are completed commands
        self.objectBuffer = dict()
        # constant values
        self.EOC = CFG.NET['eoc']
        self.CONNECTION_BROKEN = CFG.NET['broken_conn_str']
        self.NEW_CONNECTION = CFG.NET['new_conn_str']
        self.SS_MASTER = socket.gethostbyname(CFG.NET['master_hostname']) 
        self.SS_PORT = CFG.NET['master_port']
        self.BACK_LOG = CFG.NET['master_backlog']

        # master and worker
        # master connects to all workers,
        # worker only connects to master, no inter-worker connections
        if mode == 'master': # master
            lsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # IPV4 and TCP
            lsock.bind(('', self.SS_PORT)) # accept from any
            lsock.listen(self.BACK_LOG)
            # use selector for non blocking IO, only check READ, assume always writable
            lsock.setblocking(False)
            self.sel.register(lsock, selectors.EVENT_READ, data=self.NEW_CONNECTION) 
            self.logger.info('Master started on %s' % socket.gethostname())
        else: # worker, both daemons and user frontends
            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            sock.setblocking(False)
            sock.connect_ex((self.SS_MASTER, self.SS_PORT))
            #data = types.SimpleNamespace(workerName=socket.gethostname())
            #workerName = socket.gethostname()
            self.sel.register(sock, selectors.EVENT_READ, data='master')
            self.connections['master'] = sock # only connects to master
            self.objectBuffer[sock] = ['']
            self.logger.info('Daemon started on %s' % socket.gethostname())
    
    # send object to destination
    # 1. use json to serialize an object to string, only basic python types supported
    # 2. append EOC to the string, so that object strings can be separated on remote
    # 3. sendall, we dont send a string in multiple times. sendall is blocking but should work in our case
    def sendObjTo(self, destination, obj=None):
        #print('To Send >>', obj)
        wrapMsg = json.dumps(obj) + self.EOC
        self.connections[destination].sendall(wrapMsg.encode('utf-8'))

    # recv an object from anywhere
    # return value: (source, object received)
    # 1. pick an object from any buffer and return both the source and the object
    # 2. check new connection, if any, connect
    # 3. check new data fron network, if any, buffer it
    def recvObj(self, timeout=1):
        # find someone has objects, and return the first pending object
        for client, conn in self.connections.items():
            buf = self.objectBuffer[conn]
            if len(buf) >= 2:
                obj = buf.pop(1)
                if obj == self.CONNECTION_BROKEN: # all the objects from a broken have been received
                    self.logger.info(client, 'lost connection')
                    assert(buf[0] == '') # there should be not tailing incomplete string
                    self.objectBuffer.pop(conn) # remove the entry for broken connection
                    self.connections.pop(client) # also remove the connection
                return (client, obj)
        sourcelist = []
        # check if something to read from socket
        events = self.sel.select(timeout=timeout)
        for key, mask in events:
            assert(mask & selectors.EVENT_READ)
            # new connection, only master should receive this
            if key.data is self.NEW_CONNECTION:
                assert(self.mode == 'master')
                sock = key.fileobj
                conn, addr = sock.accept() 
                conn.setblocking(False)
                #worker = socket.gethostbyaddr(addr[0])[0] # hostname of new connecting machine
                #print('accepted connection from', addr)
                self.sel.register(conn, selectors.EVENT_READ, data=addr) # use addr to distinguish clients
                # TODO, re-connect workers
                if addr in self.connections:
                    print('Currently we dont handle this case')
                    assert(False)
                self.connections[addr] = conn # record new connection
                self.objectBuffer[conn] = ['']
                # for new connection event, we don't need to receive data
            else:
                sourcelist.append(key.data) # append a source that has data here, key.data is 'addr' of client
        # now read data from all connections that have data
        for source in sourcelist:
            conn = self.connections[source]
            # entry should be added when connection built
            assert(conn in self.objectBuffer)
            buf = self.objectBuffer[conn]
            # receive whatever it can
            s = conn.recv(1024).decode('utf-8') # a string that may have <1, =1, >1 dumped objects
            if len(s) == 0: # connection broken
                # NOTE !!! DO NOT pop connection from the buffer immediatly since it may have unread objects
                # DON'T DO THIS: self.commandBuffer.pop(conn)
                # However, the connection can be unregister
                self.sel.unregister(conn)
                # append the broken info to the object buffer
                buf.append(self.CONNECTION_BROKEN)
            else: # normal string
                ss = s.split(self.EOC) # split by EOC, each slice is an object or a pice of an object
                if len(ss) == 1: # incomplement
                    buf[0] = buf[0] + ss[0] # still incomplete
                else:
                    buf.append(json.loads(buf[0]+ss[0])) # the incomplete buf[0] now completed, de-serialized
                    for i in range(1, len(ss)-1): # middle slices must tbe completed
                        buf.append(json.loads(ss[i]))
                    buf[0] = ss[-1] # the tail may be incomplete
        return (None, None) # nothing
コード例 #12
0
ファイル: SSjobrunner.py プロジェクト: thu-pacman/Uberun
class SSJobRunner(threading.Thread):
    def __init__(self, hostname, jobspec, name='JobRunner'):
        super().__init__()
        self.jobspec = jobspec
        self.hostname = hostname
        self.logger = SSLogger(name)
        # results for parent
        self.returns = dict()
    # cores[i] = jobid, jobid uses this i-th core
    # ways[i] = jobid, jobid uses this i-th way
    # return a string for CAT 'pqos -s; pqos -a', e.g.
    # sudo pqos -e "llc:1=0xffff0;llc:2=0x0000f"
    # sudo pqos -a "llc:1=0-7;llc:2=8-27" # bic03,bic04
    def getCATString(self, cores, ways):
        self.logger.debug('cores:', cores)
        self.logger.debug('ways:', ways)
        jobids = set(ways) # what jobs are explicitly using LLC
        if -1 in jobids:
            jobids.remove(-1) # -1 means no job
        if len(jobids) == 0: # no CAT, reset. For LLC-unaware policies like CE and CS
            return [['ssh', 'root@' + self.hostname, 'pqos -R']]
        # give the spare ways to jobs
        jobids = list(jobids)
        for i, jid in enumerate(ways):
            if jid == -1:
                ways[i] = jobids[i % len(jobids)]
        # sort the ways with jobid to make each COS use contigious range
        ways.sort()
        # make CAT decision
        cos = []
        for jobid in jobids:
            cos.append({'cores': [], 'ways': []})
            for c, jid in enumerate(cores):
                if jobid == jid:
                    cos[-1]['cores'].append(c)
            for w, jid in enumerate(ways):
                if jobid == jid:
                    cos[-1]['ways'].append(w)
        pqosE = []
        pqosA = []
        for i, c in enumerate(cos):
            s = hex(int(''.join(['1' if x in c['ways'] else '0' for x in range(19,-1,-1)]), 2))
            pqosE.append('llc:%d=%s' % (i+1, s))
            pqosA.append('llc:%d=%s' % (i+1, ','.join(str(x) for x in c['cores'])))
        pqosEcmd = 'pqos -e "%s"' % (';'.join(pqosE))
        pqosAcmd = 'pqos -a "%s"' % (';'.join(pqosA))
        return [['ssh', 'root@' + self.hostname, pqosEcmd], ['ssh', 'root@' + self.hostname, pqosAcmd]]

    # the command use to launch the program
    def getLaunchString(self, jobspec):
        fm = jobspec['jobattr']['framework']
        if fm == 'MPI' or fm == 'Spark':
            if jobspec['leadnode'] != self.hostname:
                return (None, None)
        elif fm == 'TensorFlow':
            assert(len(jobspec['affinity']) == 1)
            pass

        affs = dict()
        for host, corelist in jobspec['affinity'].items():
            affs[host] = []
            for c, jid in enumerate(corelist):
                if jobspec['jobid'] == jid:
                    affs[host].append(str(c))

        envs = dict()
        # running commands
        # format prog-nproc, e.g. mg-16, bfs-32
        prog, nproc = jobspec['jobattr']['jobname'].split('-')
        assert(int(nproc) == jobspec['jobattr']['parallelism'])
        prog = prog.lower()

        # env vars for MPI on bic
        if fm == 'MPI':
            envs['I_MPI_SHM_LMT'] = 'shm'
            envs['I_MPI_DAPL_PROVIDER'] = 'ofa-v2-ib0'
        # env vars for Spark on bic
        if prog in ['mg', 'lu', 'ep', 'cg']: # four NPB programs
            exePath = '%s/%s.D.%s' % (CFG.RUN['exe_path']['npb'], prog, nproc)
            # mpirun -host bic05 -env I_MPI_PIN_PROCESSOR_LIST=1,2,3,4,5,6,7,9 -n 8 ./mg.D.16 : -host bic06 -env I_MPI_PIN_PROCESSOR_LIST=15,16,17,18,19,20,21,22 -n 8 ./mg.D.16
            exeCmd = ['mpirun']
            for host, corelist in affs.items():
                exeCmd.extend(['-host', host, '-env', 'I_MPI_PIN_PROCESSOR_LIST=%s' % ','.join(corelist), '-n', str(len(corelist)), exePath, ':'])
            exeCmd.pop(-1)
        elif prog in ['ts', 'wc', 'nw']: # three spark programs
            spark_master = str(int(jobspec['leadnode'][-2:]))
            exePath = '%s/%s.sh' % (CFG.RUN['exe_path']['spark'], prog)
            exeCmd = [exePath, spark_master]
            for host, corelist in affs.items():
                exeCmd.extend([host, str(len(corelist)), ','.join(corelist)])
            #self.logger.warn(exeCmd)
            self.logger.warn('%s: %s' % (prog, ' '.join(exeCmd)))
        elif prog in ['bfs']: # graph program
            exePath = CFG.RUN['exe_path'][prog]
            exeCmd = ['mpirun']
            for host, corelist in affs.items():
                exeCmd.extend(['-host', host, '-env', 'I_MPI_PIN_PROCESSOR_LIST=%s' % ','.join(corelist), '-n', str(len(corelist)), exePath, '24', '16', ':'])
            exeCmd.pop(-1)
        elif prog in ['hc', 'bw']: # two speccpu programs
            exePath = CFG.RUN['exe_path'][prog]
            exeCmd = ['mpirun']
            for host, corelist in affs.items():
                exeCmd.extend(['-host', host, '-env', 'I_MPI_PIN_PROCESSOR_LIST=%s' % ','.join(corelist), '-n', str(len(corelist)), exePath, ':'])
            exeCmd.pop(-1)
            # TODO, wrap the jobs and move this cd operation to wrapper scripts
            os.chdir(CFG.RUN['exe_dir'][prog])
        elif prog in ['gan', 'rnn']: # two tensorflow programs
            exePath = CFG.RUN['exe_path'][prog]
            assert(len(affs) == 1) # should run on only one node
            for host, corelist in affs.items():
                exeCmd = [exePath, str(len(corelist)), ','.join(corelist)]
            #self.logger.warn(' '.join(exeCmd))
        else:
            assert(False)

        return (envs, exeCmd)
    
    def getProfileString(self, jobspec):
        if not jobspec['toprofile'] or jobspec['leadnode'] != self.hostname:
            return None
        return [CFG.RUN['deploy_path'] + 'SSmonitor.py'] 

    def run(self):
        #self.logger.warn(self.jobspec)
        jobname = self.jobspec['jobattr']['jobname']
        self.logger.debug('Run:', jobname)
        # CAT configuration
        catCmds = self.getCATString(self.jobspec['coremap'], self.jobspec['llcwaymap'])
        for catCmd in catCmds:
            #self.logger.warn('CAT CMD:', ' '.join(catCmd))
            #subprocess.run(catCmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            subprocess.run(catCmd, stdout=subprocess.DEVNULL)
        # start the profiler (if needed)
        profCmd = self.getProfileString(self.jobspec)
        if profCmd:
            self.logger.debug('PROF CMD:', ' '.join(profCmd))
            #pPorfiler = subprocess.Popen(profCmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
            pPorfiler = subprocess.Popen(profCmd, stdout=subprocess.PIPE)
        # run the executable
        evns, exeCmd = self.getLaunchString(self.jobspec)
        if evns:
            for k, v in evns.items():
                os.environ[k] = v
        if exeCmd:
            self.logger.debug('EXE CMD:', ' '.join(exeCmd))
            pRun = subprocess.run(exeCmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            #pRun = subprocess.run(exeCmd, stdout=subprocess.DEVNULL)#, stderr=subprocess.DEVNULL)
            self.returns['exitcode'] = pRun.returncode
            self.logger.debug('EXE Done:', exeCmd)
            # back to deploy path
            os.chdir(CFG.RUN['deploy_path'])
        # terminate the profiler, sort out the result, and return to daemon to be sent to master
        if profCmd:
            self.logger.debug('check profile results')
            pPorfiler.terminate()
            # llcway ipc mbw
            ipcs, mbws = [], []
            for _ in range(0, 1+CFG.CLUSTER['llcway_per_node']):
                ipcs.append([])
                mbws.append([])
            for line in pPorfiler.stdout:
                ss = line.decode('utf-8').strip().split()
                #self.logger.debug(line)
                if len(ss) != 3:
                    break
                w, ipc, mbw = int(ss[0]), float(ss[1]), float(ss[2])
                ipcs[w].append(ipc)
                mbws[w].append(mbw)
            for w in range(0, 1+CFG.CLUSTER['llcway_per_node']):
                ipcs[w] = numpy.average(ipcs[w]) if len(ipcs[w]) else -1
                mbws[w] = numpy.average(mbws[w]) if len(mbws[w]) else -1
            # linear interpolation
            # self.logger.warn(CFG.PROF['sample_ways'])
            for i in range(0, len(CFG.PROF['sample_ways'])-1):
                cur_w, next_w = CFG.PROF['sample_ways'][i], CFG.PROF['sample_ways'][i+1]
                #self.logger.warn('cur_w %d, next_w %d' % (cur_w, next_w))
                for k in range(min(cur_w, next_w) + 1, max(cur_w, next_w)):
                    #self.logger.warn(k, ipcs)
                    ipcs[k] = ipcs[next_w] + (ipcs[cur_w]-ipcs[next_w])/(cur_w-next_w) * (k-next_w)
                    mbws[k] = mbws[next_w] + (mbws[cur_w]-mbws[next_w])/(cur_w-next_w) * (k-next_w)
            self.logger.warn(ipcs)
                    
            self.returns['ipcs'] = ipcs
            self.returns['mbws'] = mbws
        self.logger.echo('RETURNS', self.returns)
コード例 #13
0
class SSMaster:
    def __init__(self, algoname='CE', alpha=0.9):
        self.MIN_DAEMONS = 8
        self.net = SSMasterNetwork()
        self.db = SSDatabase(algorithm=algoname)
        self.sched = SSScheduler(algoname=algoname, database=self.db)
        self.default_alpha = alpha
        self.prtl = SSProtocol()
        self.logger = SSLogger('Master')
        self.parser = SSParser()
        self.users = []
        self.daemons = []
    
    def isclean(self):
        if len(self.db.pendingJobs) or len(self.db.runningJobs):
            return False
        else:
            return True

    def addJobSequence(self, jobstring):
        for n in jobstring.split(','):
            n = n.strip()
            fm = None
            exe = n.split('-')[0]
            if exe in ['gan', 'rnn']:
                fm = 'TensorFlow'
            elif exe in ['ts', 'nw', 'wc']:
                fm = 'Spark'
            else:
                fm = 'MPI'
            self.db.addUserJob({'jobname': n, 'framework': fm, 'parallelism': int(n[-2:]), 'alpha': self.default_alpha})

    def parse(self):
        self.parser.addRecords(self.parser.loadHistory(self.db.history))
        return self.parser.getBasicStats(self.parser.selectRecords())
    
    # the main loop
    def run(self):
        # try to get new message
        client, msg = self.net.recvObj(timeout=1)
        if client: # acts accordingly
            # connection broken
            if msg == self.net.CONNECTION_BROKEN: # client lost
                if client in self.users:
                    self.users.remove(client)
                if client in self.daemons:
                    self.logger.error('No handle for daemon lost !!')
                    self.daemons.remove(client)
                    #TODO database remove, scheduler reschedule
            # normal messages
            #self.logger.echo(msg)
            if self.prtl.isgreeting(msg): # new client
                if msg['role'] == 'user':
                    self.logger.debug('New User from', client)
                    self.users.append(client) # user for interaction
                elif msg['role'] == 'daemon':
                    self.logger.debug('New Daemon from', client)
                    self.daemons.append(client) # daemon run on each job
                    self.db.addDaemon(client, msg['hostname'])
            elif self.prtl.isjobfinish(msg):
                # NOTE, only one daemon of the job finish, need all finish to really finish
                self.db.daemonFinishJob(client, msg['jobid'], msg['returns']) 
        # wait for all daemons 
        if len(self.daemons) < self.MIN_DAEMONS:
            return
        # try to schedule jobs, ignore the estimate time
        allocation, _ = self.sched.nextJob()
        #self.logger.debug(allocation)
        if allocation:
            for daemon, jobspec in allocation:
                #self.logger.echo(daemon, jobspec)
                self.net.sendObjTo(daemon, self.prtl.newjob(jobspec))
コード例 #14
0
class SSSimulator:
    def __init__(self, alg='CE'):
        self.MIN_DAEMONS = 1
        self.clock = SimulationClock()
        self.db = SSDatabase(algorithm=alg,
                             simulationClock=self.clock,
                             logToFile=False)
        self.sched = SSScheduler(algoname=alg, database=self.db)
        self.logger = SSLogger('Simulator')
        self.parser = SSParser()
        self.users = []
        self.daemons = []
        self.trace = []
        self.pendingJobs = dict()
        self.runningJobs = dict()

    def isclean(self):
        if len(self.db.pendingJobs) or len(self.db.runningJobs) or len(
                self.trace):
            return False
        else:
            return True

    def addTrace(self, trace):
        self.trace.extend(trace)
        self.trace.sort(key=lambda x: x[2])

    def loadTrace(self, fname):
        with open(fname, 'r') as fr:
            for line in fr.readlines():
                program, nproc, submittime, duration = line.strip().split(',')
                self.trace.append(
                    (program, int(nproc), float(submittime), float(duration)))
        self.trace.sort(key=lambda x: x[2])
        jobs = ', '.join([x[0] for x in self.trace])
        self.logger.info('Job trace: ', jobs)
        #self.logger.echo(self.trace)

    def addFakeDeamons(self, prefix, cnt):
        for i in range(0, cnt):
            fakeDeamon = prefix + str(i)
            self.daemons.append(fakeDeamon)
            self.db.addDaemon(fakeDeamon, fakeDeamon)
        #self.logger.info('Daemons:', self.daemons)

    # the main loop
    def run(self, alpha=0.9):
        done_cnt = 0
        next_time = [x[2] for x in self.trace]
        heapq.heapify(next_time)
        while not self.isclean():
            # new coming job at now
            # trace is already sorted by submit time
            while len(self.trace) and self.trace[0][2] <= self.clock.now():
                fm = None
                n = self.trace[0][0]
                exe = n.split('-')[0]
                if exe in ['gan', 'rnn']:
                    fm = 'TensorFlow'
                elif exe in ['ts', 'nw', 'wc']:
                    fm = 'Spark'
                else:
                    fm = 'MPI'
                jobid = self.db.addUserJob({
                    'jobname': n,
                    'framework': fm,
                    'parallelism': self.trace[0][1],
                    'alpha': alpha
                })
                self.pendingJobs[jobid] = self.trace[0]
                self.trace.pop(0)
                #print('put jid %d as pending' % jobid)

            # if be able to start new job at now
            # try to schedule jobs, and get its estimated runtime

            while True:
                allocation, est = self.sched.nextJob()
                self.logger.debug(allocation)
                if allocation:
                    if est is None:
                        print(allocation)
                    assert (est)
                    daemons = []
                    for daemon, jobspec in allocation:
                        jobid = jobspec['jobid']
                        daemons.append(daemon)
                    # compute the duration and finish time
                    jt = self.pendingJobs[jobid]
                    # if no estimation time, use the standard duration
                    est_time = est[0] if jt[3] == 0 else jt[3] * est[1]
                    et = self.clock.now() + est_time
                    # runningjob[id] = (finish time, daemons)
                    self.runningJobs[jobid] = (et, list(daemons))
                    heapq.heappush(next_time, et + 1)
                else:
                    break

            # if a job finish at now
            flag = True
            while flag:
                flag = False
                for jobid, v in self.runningJobs.items():
                    # job finish
                    if v[0] <= self.clock.now():
                        for daemon in v[1]:
                            self.db.daemonFinishJob(daemon, jobid,
                                                    {'exitcode': 0})
                        self.runningJobs.pop(jobid)
                        done_cnt += 1
                        if done_cnt % 500 == 0:
                            print('Simulation done for %d jobs' % done_cnt)
                            pass
                        flag = True
                        break

            if len(next_time):
                self.clock.ticksto(heapq.heappop(next_time))
            else:
                self.clock.tick()

    def parse(self):
        self.parser.addRecords(self.parser.loadHistory(self.db.history))
        return self.parser.getBasicStats(self.parser.selectRecords())

    def show(self):
        self.parser.showSchedFig(self.parser.selectRecords())