def clusterPopulation(self): """Clusters the given population into k clusters using balanced k-leader-means clustering.""" # TODO: POSSIBLE CHANGE = CALCULATE OPTIMAL k VALUE # The first leader is the solution with maximum value in an arbitrary objective. leaders = [self.population[0]] for solution in self.population: if solution.fitness[0] > leaders[0].fitness[0]: leaders[0] = solution # The solution with the largest nearest-leader distance is chosen as the next leader, # repeated k - 1 times to obtain k leaders. for _ in range(self.amountOfClusters - 1): nearestLeaderDistance = {} for solution in self.population: if solution not in leaders: nearestLeaderDistance[solution] = util.euclidianDistance( solution.fitness, leaders[0].fitness) for leader in leaders: leaderDistance = util.euclidianDistance( solution.fitness, leader.fitness) if leaderDistance < nearestLeaderDistance[solution]: nearestLeaderDistance[solution] = leaderDistance leader = max(nearestLeaderDistance, key=nearestLeaderDistance.get) leaders.append(leader) # k-means clustering is performed with k leaders as the initial cluster means. clusters = [] for leader in leaders: mean = leader.fitness cluster = Cluster(mean, self.problem) clusters.append(cluster) # Perform k-means clustering until all clusters are unchanged. while True in [cluster.changed for cluster in clusters]: for solution in self.population: nearestCluster = clusters[0] nearestClusterDistance = util.euclidianDistance( solution.fitness, nearestCluster.mean) for cluster in clusters: clusterDistance = util.euclidianDistance( solution.fitness, cluster.mean) if clusterDistance < nearestClusterDistance: nearestCluster = cluster nearestClusterDistance = clusterDistance nearestCluster.append(solution) for cluster in clusters: cluster.computeMean() cluster.clear() # Expand the clusters with the closest c solutions. c = int(2 / self.amountOfClusters * self.populationSize) for cluster in clusters: distance = {} for solution in self.population: distance[solution] = util.euclidianDistance( solution.fitness, cluster.mean) for _ in range(c): if len(distance) > 0: solution = min(distance, key=distance.get) del distance[solution] cluster.append(solution) self.clusters = clusters
def k_means(points, min_x, max_x, min_y, max_y, min_z, max_z): pos1 = utl.randomPosition(min_x, min_y, min_z, max_x, max_y, max_z) pos2 = utl.randomPosition(min_x, min_y, min_z, max_x, max_y, max_z) pos3 = utl.randomPosition(min_x, min_y, min_z, max_x, max_y, max_z) cluster1 = Cluster(pos1, 'r') cluster2 = Cluster(pos2, 'g') cluster3 = Cluster(pos3, 'b') changed = True while(changed): changed = False while(cluster1.isEmpty() or cluster2.isEmpty() or cluster3.isEmpty()): cluster1.clear() cluster2.clear() cluster3.clear() for point in points: cluster = utl.closerCluster3D(cluster1, cluster2, cluster3, point) cluster.addPoint(point) randomPosition(cluster1, min_x, max_x, min_y, max_y, min_z, max_z) randomPosition(cluster2, min_x, max_x, min_y, max_y, min_z, max_z) randomPosition(cluster3, min_x, max_x, min_y, max_y, min_z, max_z) utl.draw(cluster1, cluster2, cluster3) cluster1.centralize(); cluster2.centralize(); cluster3.centralize(); utl.draw(cluster1, cluster2, cluster3) for point in points: cluster = utl.closerCluster3D(cluster1, cluster2, cluster3, point) print("after center") if(not cluster.hasPoint(point)): print("changed") changed = True cluster1.clear() cluster2.clear() cluster3.clear() break; print("cluster 1:") for point in cluster1.points: print("x: ", point.x, " y: ", point.y, " z: ", point.z) print("cluster 2:") for point in cluster2.points: print("x: ", point.x, " y: ", point.y, " z: ", point.z) print("cluster 3:") for point in cluster3.points: print("x: ", point.x, " y: ", point.y, " z: ", point.z) utl.draw(cluster1, cluster2, cluster3)
class Scheduler(object): def __init__(self, name, trace, logger): self.name = name # e.g., 'DRF' self.trace = trace if logger is None: assert name self.logger = log.getLogger(name=name, fh=False) else: self.logger = logger self.cluster = Cluster(self.logger) self.curr_ts = 0 self.end = False self.running_jobs = set() self.uncompleted_jobs = set() self.completed_jobs = set() self.data = None # all state action pairs in one ts self.rewards = [] def step(self): # step by one timeslot assert not self.end self._prepare() self._schedule() self._progress() if len(self.completed_jobs) == pm.TOT_NUM_JOBS: self.end = True self.curr_ts += 1 return self.data def get_results(self): # get final results, including avg jct, makespan and avg reward jct_list = [(job.end_time - job.arrv_time + 1.0) for job in self.completed_jobs] makespan = max([job.end_time + 1.0 for job in self.completed_jobs]) assert jct_list return (len(self.completed_jobs), 1.0 * sum(jct_list) / len(jct_list), makespan, sum(self.rewards) / len(self.rewards)) def get_job_jcts(self): jcts = dict() for job in self.completed_jobs: jcts[job.id] = job.end_time - job.arrv_time + 1.0 return jcts def _prepare(self): self.cluster.clear() self.data = [] self.running_jobs.clear() if self.curr_ts in self.trace: for job in self.trace[self.curr_ts]: job.reset( ) # must reset since it is trained for multiple epochs self.uncompleted_jobs.add(job) self.logger.debug(job.info()) for job in self.uncompleted_jobs: job.num_workers = 0 job.curr_worker_placement = [] if pm.PS_WORKER: job.num_ps = 0 job.curr_ps_placement = [] # sort based on used resources from smallest to largest for load balancing self.node_used_resr_queue = Queue.PriorityQueue() for i in range(pm.CLUSTER_NUM_NODES): self.node_used_resr_queue.put((0, i)) def _schedule(self): self.logger.info("This method is to be implemented on child class!") def _progress(self): reward = 0 for job in self.running_jobs.copy(): epoch = job.step() reward += epoch / job.num_epochs if job.progress >= job.real_num_epochs: job.end_time = self.curr_ts # self.running_jobs.remove(job) self.uncompleted_jobs.remove(job) self.completed_jobs.add(job) if pm.NUM_UNCOMPLETED_JOB_REWARD: reward = len(self.uncompleted_jobs) self.rewards.append(reward) def observe(self): ''' existing resource share of each job: 0-1 job type 0-8 job normalized progress 0-1 num of backlogs: percentage of total number of jobs in the trace ''' # cluster_state = self.cluster.get_cluster_state() # for test, first use dominant resource share of each job as input state q = Queue.PriorityQueue() for job in self.uncompleted_jobs: if pm.PS_WORKER: if job.num_workers >= pm.MAX_NUM_WORKERS and job.num_ps >= pm.MAX_NUM_WORKERS: # and, not or continue else: if job.num_workers >= pm.MAX_NUM_WORKERS: # not schedule it any more continue if pm.JOB_SORT_PRIORITY == "Resource": q.put((job.dom_share, job.arrv_time, job)) elif pm.JOB_SORT_PRIORITY == "Arrival": q.put((job.arrv_time, job.arrv_time, job)) elif pm.JOB_SORT_PRIORITY == "Progress": q.put((1 - job.progress / job.num_epochs, job.arrv_time, job)) if pm.ZERO_PADDING: state = np.zeros(shape=pm.STATE_DIM) # zero padding instead of -1 else: state = -1 * np.ones(shape=pm.STATE_DIM) self.window_jobs = [None for _ in range(pm.SCHED_WINDOW_SIZE)] shuffle = np.array([i for i in range(pm.SCHED_WINDOW_SIZE) ]) # default keep order if pm.JOB_ORDER_SHUFFLE: shuffle = np.random.choice(pm.SCHED_WINDOW_SIZE, pm.SCHED_WINDOW_SIZE, replace=False) # resource share / job arrival / progress for order in shuffle: if not q.empty(): _, _, job = q.get() j = 0 for ( input, enable ) in pm.INPUTS_GATE: # INPUTS_GATE=[("TYPE",True), ("STAY",False), ("PROGRESS",False), ("DOM_RESR",False), ("WORKERS",True)] if enable: if input == "TYPE": if not pm.INPUT_RESCALE: if not pm.TYPE_BINARY: state[j][order] = job.type else: bin_str = "{0:b}".format(job.type).zfill(4) for bin_ch in bin_str: state[j][order] = int(bin_ch) j += 1 j -= 1 else: state[j][order] = float(job.type) / 8 elif input == "STAY": if not pm.INPUT_RESCALE: state[j][order] = self.curr_ts - job.arrv_time else: state[j][order] = float(self.curr_ts - job.arrv_time) / 100 elif input == "PROGRESS": state[j][order] = 1 - job.progress / job.num_epochs elif input == "DOM_RESR": state[j][order] = job.dom_share elif input == "WORKERS": if not pm.INPUT_RESCALE: state[j][order] = job.num_workers else: state[j][order] = float( job.num_workers) / pm.MAX_NUM_WORKERS elif input == "PS": if not pm.INPUT_RESCALE: state[j][order] = job.num_ps else: state[j][order] = float( job.num_ps) / pm.MAX_NUM_WORKERS else: raise RuntimeError j += 1 self.window_jobs[order] = job # backlog = float(max(len(self.uncompleted_jobs) - pm.SCHED_WINDOW_SIZE, 0))/len(pm.TOT_NUM_JOBS) self.logger.debug("ts: " + str(self.curr_ts) \ + " backlog: " + str(max(len(self.uncompleted_jobs) - pm.SCHED_WINDOW_SIZE, 0)) \ + " completed jobs: " + str(len(self.completed_jobs)) \ + " uncompleted jobs: " + str(len(self.uncompleted_jobs))) return state def _state( self, label_job_id, role="worker" ): # whether this action selection leads to worker increment or ps increment # cluster_state = self.cluster.get_cluster_state() input = self.observe() # NN input label = np.zeros(pm.ACTION_DIM) for i in range(pm.SCHED_WINDOW_SIZE): job = self.window_jobs[i] if job and job.id == label_job_id: if pm.PS_WORKER: if pm.BUNDLE_ACTION: if role == "worker": label[i * 3] = 1 elif role == "ps": label[i * 3 + 1] = 1 elif role == "bundle": label[i * 3 + 2] = 1 else: if role == "worker": label[i * 2] = 1 elif role == "ps": label[i * 2 + 1] = 1 else: label[i] = 1 self.data.append((input, label))