def launch(): agent = Heuristic_Agents() env = Env(0, 1) task_dist = Task_Dist() workloads = task_dist.gen_seq_workload() for ex in range(agent.params.num_ex): env.reset() env.workload_seq = workloads[ex] for i in range(len(workloads[ex])): env.generate_workload() env.seq_id += 1 print('Testing : ', env.workload_seq) ob = env.observe() rews = [] acts = [] for _ in range(agent.params.episode_max_length): a = agent.get_action(env) plt1 = visualize_state(ob, agent.params, '/tmp/trajs/episode_%d' % int(_)) ob, rews, done, status = env.step(a, _, rews) if status == 'Allocation_Success': finished_episode_len = _ + 1 if done: break print('Test Actions: ', acts[:finished_episode_len]) print('Reward : ', rews) print('Reward : ', sum(rews))
def __init__(self, cur_time, time_step): self.params = Parameters() self.cur_time = cur_time self.time_step = time_step self.machines = [] self.waiting_tasks = [] #initialize machines for i in range(self.params.num_machines): self.machines.append(Machine(i, self.params.machine_res_cap)) self.task_dist = Task_Dist() self.workload_seq = None self.seq_id = 0
def main(): import params import sys task_dist = Task_Dist() workloads = task_dist.gen_seq_workload() pa = params.Params() env = Env(0, 1) # env.workload_seq = workloads[0] import os import sys, getopt import threading import tensorflow as tf import numpy as np from time import time, sleep import random #from agent import Agent # Train for this many steps # T_MAX = 100000000 # Use this many threads #NUM_THREADS = 8 # Initial learning rate for Adam # INITIAL_LEARNING_RATE = 1e-4 # The discount factor # DISCOUNT_FACTOR = 0.99 # Evaluate the agent and print out average reward every this many steps # VERBOSE_EVERY = 40000 # Update the parameters in each thread after this many steps in that thread #I_ASYNC_UPDATE = 5 # Use this global variable to exit the training loop in each thread once we've finished. training_finished = False tf.reset_default_graph() sess = tf.Session() agent = Agent(sess, pa) # a3c(pa) sess.run(tf.global_variables_initializer()) # create a tensorflow saver pbject and keep 50000 checkpoints saver = tf.train.Saver(max_to_keep=50000) async_trainer(agent, env, pa, sess, saver, workloads)
class Env1: def __init__(self, cur_time, time_step): self.params = Parameters() self.cur_time = cur_time self.time_step = time_step self.machines = [] self.waiting_tasks = [] #initialize machines for i in range(self.params.num_machines): self.machines.append(Machine(i, self.params.machine_res_cap)) self.task_dist = Task_Dist() self.workload_seq = None self.seq_id = 0 #Generate workload and populate self.waiting tasks after each interval def generate_workload(self): if len(self.workload_seq) <= self.seq_id: return # print('Incoming Tasks: ',self.seq_id, self.workload_seq[self.seq_id]) if self.workload_seq[self.seq_id]: for task_type in self.workload_seq[self.seq_id]: task_color, task_cpu_limit, task_finish_time = self.task_dist.get_task_details( task_type) max_color = task_color for tsk in self.waiting_tasks: if task_type == tsk.service: if max_color <= tsk.color: max_color = tsk.color for mcn in self.machines: for tsk in mcn.running_tasks: if task_type == tsk.service and max_color <= tsk.color: max_color = tsk.color task_color = max_color + 0.01 self.waiting_tasks.append( Task(task_type, task_color, task_cpu_limit, task_finish_time, self.cur_time)) def observe(self): img_repr = np.zeros((self.params.state_len, self.params.state_width)) #add machines used_width = 0 for res in range(self.params.num_res): for machine in self.machines: img_repr[:, used_width:used_width + self.params.machine_res_cap] = machine.canvas[ res, :, :] used_width += self.params.machine_res_cap #add backlog queue if len(self.waiting_tasks) > 0: t = 0 for i in range(self.params.state_len): for j in range(self.params.backlog_width): img_repr[i, used_width + j] = self.waiting_tasks[t].color t += 1 if (t == len(self.waiting_tasks)): break if (t == len(self.waiting_tasks)): break used_width += self.params.backlog_width assert used_width == self.params.state_width return img_repr def step(self, action, episode_time, rewards, c, log=False): status = None done = False reward = 0 if len(self.waiting_tasks) == 0: status = 'Backlog_Empty' elif action == self.params.num_machines: status = 'Invalid' else: allocated = self.machines[action].allocate_task( self.waiting_tasks[c], episode_time) if allocated: status = 'Allocation_Success' self.waiting_tasks[c].start_time = self.cur_time self.waiting_tasks = self.waiting_tasks[ 0:c] + self.waiting_tasks[c + 1:] else: status = 'Allocation_Failed' if status == 'Allocation_Success' or 'Invalid' or 'Allocation_Failed' or 'Backlog_Empty': self.seq_id += 1 # self.generate_workload() self.update() #TODO fix max no of jobs, so when all jobs complete episode ends unfinished = 0 for machine in self.machines: if len(machine.running_tasks) != 0: unfinished += 1 if unfinished == 0 and len(self.waiting_tasks) == 0: done = True if self.cur_time > self.params.episode_max_length: # run too long, force termination done = True rewards = self.get_reward(rewards, log) self.generate_workload() # if status == 'Allocation_Failed': # reward += -100 # if status == 'No_More_Jobs' and action < self.params.num_machines: # reward += -100 # if len(self.waiting_tasks) != 0 and action == self.params.num_machines: # reward += -100 ob = self.observe() if done: self.reset() return ob, rewards, done, status def reset(self): self.cur_time = 0 self.machines = [] self.waiting_tasks = [] for i in range(self.params.num_machines): self.machines.append(Machine(i, self.params.machine_res_cap)) self.seq_id = 0 #self.workload_seq = self.task_dist.gen_seq_workload() def get_suitable_machines(self, task): return [ machine for machine in self.machines if machine.cpus_left > task.cpu_limit ] def update(self): self.cur_time += self.time_step for machine in self.machines: machine.update(self.task_dist, self.cur_time) def schedule(self): unscheduled_tasks = [] for task in self.waiting_tasks: # suitable_machines = self.get_suitable_machines(task) # if not suitable_machines: # break machine = self.find_best_machine(task, suitable_machines) if (machine is not None): task.start_time = self.cur_time machine[0].allocate_task(task) else: unscheduled_tasks.append(task) self.waiting_tasks = unscheduled_tasks def find_best_machine(self, task, suitable_machines): if np.random.randint(2): return np.random.choice(suitable_machines, 1, p=[1 / len(suitable_machines)] * len(suitable_machines)) else: return None def get_reward(self, rewards, log): rewards.append(0) #Penaly for putting a task on hold rewards[-1] += self.params.hold_penalty * len(self.waiting_tasks) / ( self.params.state_len * self.params.state_width ) #TODO add some penaly factor # print('Hold : ', self.params.hold_penalty * len(self.waiting_tasks)) #Penalty using cross co-relation of cpu_util for i, machine in enumerate(self.machines): # print('cpus left:', machine.cpus_left) if len(machine.running_tasks) == 1 and len( machine.running_tasks[0].cpu_util) == 1: # print(i+1, 'New machine allocated') rewards[-1] += self.params.machine_used_penalty / ( self.params.state_len * self.params.state_width) if machine.cpus_left < 0 and not machine.running_tasks[ -1].already_overshoot: if log: print(i + 1, 'Overshoot', self.params.overshoot_penalty) machine.running_tasks[-1].already_overshoot = True rewards[machine.running_tasks[-1]. episode_time] += self.params.overshoot_penalty / ( self.params.state_len * self.params.state_width) for task in machine.running_tasks: for tsk in task.conf_at_scheduling: if tsk in machine.running_tasks: rewards[ task. episode_time] += self.params.interference_penalty * ( task.cpu_util[-1] * tsk.cpu_util[-1]) / (self.params.state_len * self.params.state_width) # print('Other : ', self.params.interference_penalty * (task.cpu_util[-1] * tsk.cpu_util[-1])) # for i in range(len(machine.running_tasks)): # for j in range(i+1, len(machine.running_tasks)): # task_i, task_j = machine.running_tasks[i], machine.running_tasks[j] # # if task_i != task_j and len(task_i.cpu_util) > self.params.hist_wind_len and len(task_j.cpu_util) > self.params.hist_wind_len: # # reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-self.params.hist_wind_len:], task_j.cpu_util[-self.params.hist_wind_len:])) # if task_i != task_j: # # m = min(self.params.hist_wind_len, min(len(task_i.cpu_util), len(task_j.cpu_util))) # # reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-m:], task_j.cpu_util[-m:])) # reward += self.params.interference_penalty * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) return rewards
def main(): agent = Heuristic_Agents() env = Env(0, 1) task_dist = Task_Dist() delay=0 workloads = task_dist.gen_seq_workload() logs = open('/home/dell/logs_cpu_mem_tetris_23ex', 'a') no_machines=[] logline = str(0) + '\n' ex_indices=[] for ex in range(agent.params.num_ex): env.reset() env.workload_seq = workloads[ex] # for i in range(len(workloads[ex+agent.params.num_ex])): env.generate_workload() # env.seq_id += 1 print('Testing : ', env.workload_seq) ob = env.observe() rews = [] acts = [] cpu_crs = [0]*10 cpu_crs_max=[0]*10 mem_crs = [0]*10 mem_crs_max=[0]*10 c_utils = '' m_utils = '' suffer = [] np.random.seed(20) for _ in range(agent.params.episode_max_length): a,task,task_id = agent.get_action(env) acts.append(a) #if ex==0: # plt1 = visualize_state(ob, agent.params, '/home/dell/trajs/worst_fit20/episode_%d' % int(_)) ob, rews, done, status= env.step(a, _, rews,task_id) if status == 'Allocation_Success': finished_episode_len = _ + 1 # print('Example>>>',ex) # print('Service Name>>',task.service) # print('Entry time>>',task.enter_time) delay+=task.start_time-task.enter_time # print('Scheduling Time>>',task.start_time) # print('Delay>>>>',task.start_time - task.enter_time) if done: break c_util = '' m_util = '' for k, machine in enumerate(env.machines): if len(machine.running_tasks) > 0: if machine.cpus_left >= 0: c_util+=str(machine.total_cpus - machine.cpus_left) +',' else: c_util+=str(machine.total_cpus+abs(machine.cpus_left)) +',' suffer.append(abs(machine.cpus_left)) else: c_util += str(0) + ',' for k, machine in enumerate(env.machines): if len(machine.running_tasks) > 0: if machine.mems_left >= 0: m_util+=str(machine.total_mems - machine.mems_left) +',' else: m_util+=str(machine.total_mems+abs(machine.mems_left)) +',' suffer.append(abs(machine.mems_left)) else: m_util += str(0) + ',' cpu_crs_this_time = [0]*agent.params.num_machines mem_crs_this_time = [0]*agent.params.num_machines for i in range(len(machine.running_tasks)): for j in range(i+1, len(machine.running_tasks)): task_i, task_j = machine.running_tasks[i], machine.running_tasks[j] if task_i != task_j and len(task_i.cpu_util)>0 and len(task_j.cpu_util)>0: cpu_crs[k] += agent.params.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) cpu_crs_this_time[k] += agent.params.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) if task_i != task_j and len(task_i.mem_util)>0 and len(task_j.mem_util)>0: mem_crs[k] += agent.params.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1) mem_crs_this_time[k] += agent.params.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1) cpu_crs_max[k] = max(cpu_crs_max[k], cpu_crs_this_time[k]) mem_crs_max[k] = max(mem_crs_max[k], mem_crs_this_time[k]) ################# c_utils += c_util + '|' m_utils += m_util + '|' logline += str(str(_ -1)+'|'+str(c_utils) + str(finished_episode_len)) + '\n' + str(sum(rews)) + '\n' + str(sum(suffer)) +'\n' logline +=str(m_utils) +'\n' for i in range(len(env.machines)): logline += str(cpu_crs[i]) + ',' logline = logline[:-1] + '\n' for i in range(len(env.machines)): logline += str(cpu_crs_max[i]) + ',' logline = logline[:-1] logline += '\n' for i in range(len(env.machines)): logline += str(mem_crs[i]) + ',' logline = logline[:-1] + '\n' for i in range(len(env.machines)): logline += str(mem_crs_max[i]) + ',' logline = logline[:-1] logline += '\n' print('Test Actions: ', acts[:finished_episode_len]) print('Reward : ', rews) print('Reward : ', sum(rews)) print('Number of machines used>>',len(set(acts[:finished_episode_len]))-1) no_machines.append(len(set(acts[:finished_episode_len]))-1) if len(set(acts[:finished_episode_len]))-1 ==8 or len(set(acts[:finished_episode_len]))-1 ==9 : ex_indices.append(ex) print('Average Delay>>>',delay/agent.params.num_test_ex) print('Number of examples with 6 machines>>>',no_machines.count(6)) print('Number of examples with 7 machines>>>',no_machines.count(7)) print('Number of examples with 8 machines>>>',no_machines.count(8)) print('Number of examples with 9 machines>>>',no_machines.count(9)) print('Number of examples with 10 machines>>>',no_machines.count(10)) logs.write(logline) logs.flush() # import pickle # with open("workloads8_9_threecombo_68_random_indices.txt", "wb") as fp: #Pickling # pickle.dump(ex_indices, fp) os.fsync(logs.fileno())
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'): task_dist = Task_Dist() workloads = task_dist.gen_seq_workload() # ---------------------------- print("Preparing for workers...") # ---------------------------- #logs = open('/home/shanka/logs_packing_deeprm', 'a') pg_learners = [] envs = [] job_distribution = Dist() nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work( pa, seed=42) for ex in range(pa.num_ex): print("-prepare for env-", ex) env = environment.Env(pa, nw_len_seqs=nw_len_seqs, nw_size_seqs=nw_size_seqs, render=False, repre=repre, end=end) env.seq_no = ex envs.append(env) for ex in range(pa.batch_size + 1): # last worker for updating the parameters print("-prepare for worker-", ex) pg_learner = pg_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = pickle.load(net_handle) pg_learner.set_net_params(net_params) pg_learners.append(pg_learner) accums = init_accums(pg_learners[pa.batch_size]) # -------------------------------------- # print("Preparing for reference data...") # -------------------------------------- # print('Start testing...') # for ite in range(10,1000,10): # pg_resume = pa.output_filename +'_'+str(ite)+'.pkl' # logline=test(ite,pa, pg_resume,workloads,repre) # logs.write(logline) # logs.flush() # os.fsync(logs.fileno()) # return # ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa, pg_resume=None, render=False, plot=False, repre=repre, end=end) # mean_rew_lr_curve = [] # max_rew_lr_curve = [] # slow_down_lr_curve = [] # -------------------------------------- print("Start training...") # -------------------------------------- timer_start = time.time() for iteration in range(1, pa.num_epochs): ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_indices = range(pa.num_ex) # np.random.shuffle(ex_indices) all_eprews = [] grads_all = [] loss_all = [] eprews = [] eplens = [] all_slowdown = [] all_entropy = [] ex_counter = 0 for ex in range(pa.num_ex): ex_idx = ex_indices[ex] p = Process(target=get_traj_worker, args=( pg_learners[ex_counter], envs[ex_idx], pa, manager_result, )) ps.append(p) ex_counter += 1 if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: print(ex, "out of", pa.num_ex) ex_counter = 0 for p in ps: p.start() for p in ps: p.join() result = [] # convert list from shared memory for r in manager_result: result.append(r) ps = [] manager_result = manager.list([]) all_ob = concatenate_all_ob_across_examples( [r["all_ob"] for r in result], pa) all_action = np.concatenate([r["all_action"] for r in result]) all_adv = np.concatenate([r["all_adv"] for r in result]) # Do policy gradient update step, using the first agent # put the new parameter in the last 'worker', then propagate the update at the end grads = pg_learners[pa.batch_size].get_grad( all_ob, all_action, all_adv) grads_all.append(grads) all_eprews.extend([r["all_eprews"] for r in result]) eprews.extend(np.concatenate([r["all_eprews"] for r in result ])) # episode total rewards eplens.extend(np.concatenate([r["all_eplens"] for r in result ])) # episode lengths # all_slowdown.extend(np.concatenate([r["all_slowdown"] for r in result])) # all_entropy.extend(np.concatenate([r["all_entropy"] for r in result])) # assemble gradients grads = grads_all[0] for i in range(1, len(grads_all)): for j in range(len(grads)): grads[j] += grads_all[i][j] # propagate network parameters to others params = pg_learners[pa.batch_size].get_params() rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps) for i in range(pa.batch_size + 1): pg_learners[i].set_net_params(params) timer_end = time.time() print("-----------------") print("Iteration: \t %i" % iteration) print("NumTrajs: \t %i" % len(eprews)) print("NumTimesteps: \t %i" % np.sum(eplens)) # print "Loss: \t %s" % np.mean(loss_all) print("MaxRew: \t %s" % np.average([np.max(rew) for rew in all_eprews])) print("MeanRew: \t %s +- %s" % (np.mean(eprews), np.std(eprews))) # print "MeanSlowdown: \t %s" % np.mean(all_slowdown) print("MeanLen: \t %s +- %s" % (np.mean(eplens), np.std(eplens))) # print "MeanEntropy \t %s" % (np.mean(all_entropy)) print("Elapsed time\t %s" % (timer_end - timer_start), "seconds") print("-----------------") # max_rew_lr_curve.append(np.average([np.max(rew) for rew in all_eprews])) # mean_rew_lr_curve.append(np.mean(eprews)) # slow_down_lr_curve.append(np.mean(all_slowdown)) if iteration % pa.output_freq == 0: pg_resume = pa.output_filename + '_' + str(iteration) + '.pkl' param_file = open(pg_resume, 'wb') pickle.dump(pg_learner.get_params(), param_file, -1) param_file.close() test(pa, pg_resume, workloads, repre)
def test(pa): def ex_test(pg_learner, env, pa, result): env.reset() env.generate_workload() ob = env.observe() acts = [] probs = [] crs = [0] * pa.num_machines crs_max = [0] * pa.num_machines rews = [] utils = '' suffer = [] finished_episode_len = 0 logline = '' for _ in range(pa.episode_max_length): a = pg_learner.get_action(env) ob, rews, done, status = env.step(a, _, rews) acts.append(a) if status == 'Allocation_Success': finished_episode_len = _ + 1 if done: break ##############logs util = '' for k, machine in enumerate(env.machines): if len(machine.running_tasks) > 0: if machine.cpus_left >= 0: util += str(machine.total_cpus - machine.ac_cpus_left) + ',' else: util += str(machine.total_cpus) + ',' if machine.ac_cpus_left < 0: suffer.append(abs(machine.ac_cpus_left)) else: util += str(0) + ',' crs_this_time = [0] * pa.num_machines for i in range(len(machine.running_tasks)): for j in range(i + 1, len(machine.running_tasks)): task_i, task_j = machine.running_tasks[ i], machine.running_tasks[j] if task_i != task_j: crs[k] += pa.interference_penalty * ( task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) crs_this_time[k] += pa.interference_penalty * ( task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) crs_max[k] = max(crs_max[k], crs_this_time[k]) ################# utils += util + '|' logline += str( str(_ - 1) + '|' + str(utils) + str(finished_episode_len)) + '\n' + str(sum(rews)) + '\n' + str( sum(suffer)) + '\n' for i in range(len(env.machines)): logline += str(crs[i]) + ',' logline = logline[:-1] + '\n' for i in range(len(env.machines)): logline += str(crs_max[i]) + ',' logline = logline[:-1] logline += '\n' result.append(logline) pg_learners = [] envs = [] task_dist = Task_Dist() workloads = task_dist.gen_seq_workload() for ex in range(pa.num_test_ex): print("-prepare for env-", ex) env = Env(0, 1) env.workload_seq = workloads[ex + pa.num_ex] envs.append(env) for ex in range(pa.batch_size): # last worker for updating the parameters print("-prepare for worker-", ex) pg_learner = Heuristic_Agents() pg_learners.append(pg_learner) logs = open('/tmp/logs3', 'a') loglines = '' for it in range(2, pa.num_epochs + 1, 2): if (it % 10): print('Iteration : ', it) ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_counter = 0 loglines += str(it) + '\n' for ex in range(pa.num_test_ex): p = Process(target=ex_test, args=( pg_learners[ex_counter], envs[ex], pa, manager_result, )) ps.append(p) ex_counter += 1 if ex_counter >= pa.batch_size or ex == pa.num_test_ex - 1: ex_counter = 0 for p in ps: p.start() for p in ps: p.join() # convert list from shared memory for r in manager_result: loglines += r ps = [] manager_result = manager.list([]) logs.write(loglines)
class Env: def __init__(self, cur_time, time_step): self.params = Params() self.cur_time = cur_time self.time_step = time_step self.machines = [] self.waiting_tasks = [] #initialize machines for i in range(self.params.num_machines): self.machines.append( Machine(i, self.params.machine_res_cap[0], self.params.machine_res_cap[1])) self.task_dist = Task_Dist() self.workload_seq = None self.seq_id = 0 #Generate workload and populate self.waiting tasks after each interval def generate_workload(self): if len(self.workload_seq) <= self.seq_id: return # print('Incoming Tasks: ',self.seq_id, self.workload_seq[self.seq_id]) if self.workload_seq[self.seq_id]: for task_type in self.workload_seq[self.seq_id]: task_color, task_cpu_limit, task_mem_limit, task_finish_time = self.task_dist.get_task_details( task_type) max_color = task_color for tsk in self.waiting_tasks: if task_type == tsk.service: if max_color <= tsk.color: max_color = tsk.color for mcn in self.machines: for tsk in mcn.running_tasks: if task_type == tsk.service and max_color <= tsk.color: max_color = tsk.color task_color = max_color + 0.01 self.waiting_tasks.append( Task(task_type, task_color, task_cpu_limit, task_mem_limit, task_finish_time, self.cur_time)) #return the state of environment as 2D-matrix def observe(self): img_repr = np.zeros((self.params.state_len, self.params.state_width)) #add machines used_width = 0 for res in range(self.params.num_res): for machine in self.machines: # print(res,machine.canvas[res][0,:,:].shape) if res == 0: img_repr[:, used_width:used_width + self.params. machine_res_cap[res]] = machine.canvas1[0, :, :] else: img_repr[:, used_width:used_width + self.params. machine_res_cap[res]] = machine.canvas2[0, :, :] # print('image',img_repr) used_width += self.params.machine_res_cap[res] + 1 #add backlog queue if len(self.waiting_tasks) > 0: t = 0 for i in range(self.params.state_len): for j in range(self.params.backlog_width): img_repr[i, used_width + j] = self.waiting_tasks[t].color t += 1 if (t == len(self.waiting_tasks)): break if (t == len(self.waiting_tasks)): break used_width += self.params.backlog_width assert used_width == self.params.state_width k = -1 for res in range(self.params.num_res): for machine in self.machines: k += self.params.machine_res_cap[res] + 1 j = 0 for m in machine.running_tasks: img_repr[j, k] = m.color + machine.mid j += 1 return img_repr #changes the state of system by taking the action and returns the rewards, new state due to that action def step(self, action, episode_time, rewards, task_no): status = None done = False reward = 0 if len(self.waiting_tasks) == 0: status = 'Backlog_Empty' elif action == self.params.num_machines: status = 'Invalid' else: allocated = self.machines[action].allocate_task( self.waiting_tasks[0], episode_time) if allocated: status = 'Allocation_Success' # print('Current Time>>',self.cur_time) self.waiting_tasks[task_no].start_time = self.cur_time self.waiting_tasks = self.waiting_tasks[ 0:task_no] + self.waiting_tasks[task_no + 1:] else: status = 'Allocation_Failed' if (status == 'Invalid') or (status == 'Allocation_Failed') or ( status == 'Backlog_Empty'): self.seq_id += 1 # self.generate_workload() self.update() #TODO fix max no of jobs, so when all jobs complete episode ends unfinished = 0 for machine in self.machines: if len(machine.running_tasks) != 0: unfinished += 1 if unfinished == 0 and len(self.waiting_tasks) == 0: done = True if self.cur_time > self.params.episode_max_length: # run too long, force termination done = True rewards = self.get_reward(rewards) self.generate_workload() # if status == 'Allocation_Failed': # reward += -100 # if status == 'No_More_Jobs' and action < self.params.num_machines: # reward += -100 # if len(self.waiting_tasks) != 0 and action == self.params.num_machines: # reward += -100 else: unfinished = 0 for machine in self.machines: if len(machine.running_tasks) != 0: unfinished += 1 if unfinished == 0 and len(self.waiting_tasks) == 0: done = True if self.cur_time > self.params.episode_max_length: # run too long, force termination done = True rewards = self.get_reward(rewards) ob = self.observe() if done: self.reset() return ob, rewards, done, status #reset the system by making all machines empty and time to 0 def reset(self): self.cur_time = 0 self.machines = [] self.waiting_tasks = [] for i in range(self.params.num_machines): self.machines.append( Machine(i, self.params.machine_res_cap[0], self.params.machine_res_cap[1])) self.seq_id = 0 #update time, status of running tasks in every machine def update(self): self.cur_time += self.time_step for machine in self.machines: machine.update(self.task_dist, self.cur_time) def get_reward(self, rewards): rewards.append(0) #Penaly for putting a task on hold rewards[-1] += self.params.hold_penalty * len( self.waiting_tasks) #TODO add some penaly factor # print('Hold : ', self.params.hold_penalty * len(self.waiting_tasks)) #Penalty using cross co-relation of cpu_util for i, machine in enumerate(self.machines): tasks = [] # print('cpus left:', machine.cpus_left) if len(machine.running_tasks) > 0 and machine.cpus_left > 0: # print(i+1, 'New machine allocated') rewards[-1] += (-1) * pow(machine.cpus_left, self.params.machine_used_penalty) if len(machine.running_tasks) > 0 and machine.mems_left > 0: # print(i+1, 'New machine allocated') rewards[-1] += (-1) * pow(machine.mems_left, self.params.machine_used_penalty) # if len(machine.running_tasks) == 1 and len(machine.running_tasks[0].cpu_util) == 0: # print(i+1, 'New machine allocated') # rewards[-1] += self.params.machine_used_penalty for j, task in enumerate(reversed(machine.running_tasks)): tasks.append(task) if j == 0: if machine.cpus_left < 0 and not task.already_overshoot_cpu: print(i + 1, 'OvershootA_CPU', abs(machine.cpus_left)) task.already_overshoot_cpu = True rewards[ task.episode_time] += self.params.overshoot_penalty else: sum = 0 for m in tasks[0:j]: if len(m.cpu_util) > 0: sum += m.cpu_util[-1] if (machine.cpus_left + sum) < 0 and not task.already_overshoot_cpu: print(i + 1, 'OvershootB_CPU', abs(machine.cpus_left + sum)) task.already_overshoot_cpu = True rewards[ task.episode_time] += self.params.overshoot_penalty tasks = [] for j, task in enumerate(reversed(machine.running_tasks)): tasks.append(task) if j == 0: if machine.mems_left < 0 and not task.already_overshoot_mem: print(i + 1, 'OvershootA_MEM', abs(machine.mems_left)) task.already_overshoot_mem = True rewards[ task.episode_time] += self.params.overshoot_penalty else: sum = 0 for m in tasks[0:j]: if len(m.mem_util) > 0: sum += m.mem_util[-1] if (machine.mems_left + sum) < 0 and not task.already_overshoot_mem: print(i + 1, 'OvershootB_MEM', abs(machine.mems_left + sum)) task.already_overshoot_mem = True rewards[ task.episode_time] += self.params.overshoot_penalty for task in machine.running_tasks: for tsk in task.conf_at_scheduling: if tsk in machine.running_tasks: if len(task.cpu_util) > 0 and len(tsk.cpu_util) > 0: rewards[ task. episode_time] += self.params.interference_penalty_cpu * ( task.cpu_util[-1] * tsk.cpu_util[-1]) if len(task.mem_util) > 0 and len(tsk.mem_util) > 0: rewards[ task. episode_time] += self.params.interference_penalty_mem * ( task.mem_util[-1] * tsk.mem_util[-1]) # print('Other : ', self.params.interference_penalty * (task.cpu_util[-1] * tsk.cpu_util[-1])) # for i in range(len(machine.running_tasks)): # for j in range(i+1, len(machine.running_tasks)): # task_i, task_j = machine.running_tasks[i], machine.running_tasks[j] # # if task_i != task_j and len(task_i.cpu_util) > self.params.hist_wind_len and len(task_j.cpu_util) > self.params.hist_wind_len: # # reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-self.params.hist_wind_len:], task_j.cpu_util[-self.params.hist_wind_len:])) # if task_i != task_j: # # m = min(self.params.hist_wind_len, min(len(task_i.cpu_util), len(task_j.cpu_util))) # # reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-m:], task_j.cpu_util[-m:])) # reward += self.params.interference_penalty * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) return rewards
def test2(pa): def ex_test(pg_learner, env, pa, result): env.reset() env.generate_workload() ob = env.observe() acts = [] probs = [] cpu_crs = [0]*pa.num_machines cpu_crs_max = [0]*pa.num_machines mem_crs = [0]*pa.num_machines mem_crs_max = [0]*pa.num_machines rews = [] c_utils = '' m_utils= '' suffer = [] finished_episode_len = 0 logline = '' for _ in range(pa.episode_max_length): act_prob = pg_learner.get_one_act_prob(ob) csprob_n = np.cumsum(act_prob) a = np.argmax(act_prob) ob, rews, done, status= env.step(a, _, rews) acts.append(a) probs.append(act_prob) if status == 'Allocation_Success': finished_episode_len = _ + 1 if done: break ##############logs c_util = '' m_util= '' for k, machine in enumerate(env.machines): if len(machine.running_tasks) > 0: if machine.cpus_left >= 0: c_util += str(machine.total_cpus - machine.cpus_left) + ',' else: c_util += str(machine.total_cpus) + ',' suffer.append(abs(machine.cpus_left)) else: c_util += str(0) + ',' for k, machine in enumerate(env.machines): if len(machine.running_tasks) > 0: if machine.mems_left >= 0: m_util += str(machine.total_mems - machine.mems_left) + ',' else: m_util += str(machine.total_mems) + ',' suffer.append(abs(machine.mems_left)) else: m_util += str(0) + ',' cpu_crs_this_time = [0]*pa.num_machines for i in range(len(machine.running_tasks)): for j in range(i+1, len(machine.running_tasks)): task_i, task_j = machine.running_tasks[i], machine.running_tasks[j] if task_i != task_j and len(task_i.cpu_util)>0 and len(task_j.cpu_util)>0: cpu_crs[k] += pa.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) cpu_crs_this_time[k] += pa.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1) cpu_crs_max[k] = max(cpu_crs_max[k], cpu_crs_this_time[k]) ################# mem_crs_this_time = [0]*pa.num_machines for i in range(len(machine.running_tasks)): for j in range(i+1, len(machine.running_tasks)): task_i, task_j = machine.running_tasks[i], machine.running_tasks[j] if task_i != task_j and len(task_i.mem_util)>0 and len(task_j.mem_util)>0: mem_crs[k] += pa.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1) mem_crs_this_time[k] += pa.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1) mem_crs_max[k] = max(mem_crs_max[k], mem_crs_this_time[k]) ################# c_utils += c_util + '|' m_utils += m_util + '|' logline += str(str(_-1)+'|'+str(c_utils) + str(finished_episode_len)) + '\n' + str(sum(rews)) + '\n' + str(sum(suffer)) +'\n' logline+=str(m_utils) +'\n' for i in range(len(env.machines)): logline += str(cpu_crs[i]) + ',' logline = logline[:-1] + '\n' for i in range(len(env.machines)): logline += str(cpu_crs_max[i]) + ',' logline = logline[:-1] logline += '\n' for i in range(len(env.machines)): logline += str(mem_crs[i]) + ',' logline = logline[:-1] + '\n' for i in range(len(env.machines)): logline += str(mem_crs_max[i]) + ',' logline = logline[:-1] logline += '\n' result.append(logline) pg_learners = [] envs = [] task_dist = Task_Dist() workloads = task_dist.gen_seq_workload() for ex in range(pa.num_ex): print("-prepare for env-", ex) env = Env(0, 1) env.workload_seq = workloads[ex] envs.append(env) for ex in range(pa.batch_size): # last worker for updating the parameters print("-prepare for worker-", ex) pg_learner = policy_network.PGLearner(pa) pg_learners.append(pg_learner) logs = open('/home/dell/logs_cpu_mem_aischedule', 'a') loglines = '' for it in range(1300, 1320, 20): if(it % 10): print('Iteration : ',it) pg_resume = '/home/dell/testing_part2/' + str(it) + '.pkl_' net_handle = open(pg_resume, 'rb') net_params = pickle.load(net_handle) for ex in range(pa.batch_size): pg_learners[ex].set_net_params(net_params) ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_counter = 0 loglines += str(it) + '\n' for ex in range(pa.num_ex): p = Process(target=ex_test, args=(pg_learners[ex_counter], envs[ex], pa, manager_result, )) ps.append(p) ex_counter += 1 if ex_counter >= pa.batch_size or ex == pa.num_test_ex - 1: ex_counter = 0 for p in ps: p.start() for p in ps: p.join() # convert list from shared memory for r in manager_result: loglines += r ps = [] manager_result = manager.list([]) logs.write(loglines)
def launch(pa, pg_resume=None, save_freq = 50, render=False, repre='image', end='no_new_job', test_only=False): task_dist = Task_Dist() workloads = task_dist.gen_seq_workload() if test_only: test(0, pa, pg_resume, workloads) return pg_learners = [] envs = [] for ex in range(pa.num_ex): print("-prepare for env-", ex) env = Env(0, 1) env.workload_seq = workloads[ex] envs.append(env) for ex in range(pa.batch_size + 1): # last worker for updating the parameters print("-prepare for worker-", ex) pg_learner = policy_network.PGLearner(pa) if pg_resume is not None: net_handle = open(pg_resume, 'rb') net_params = pickle.load(net_handle) pg_learner.set_net_params(net_params) pg_learners.append(pg_learner) accums = init_accums(pg_learners[pa.batch_size]) print ('Preparing for Training from Scratch...') # ref_discount_rews=slow_down_cdf.launch(pa,pg_resume=None,render=False,repre=repre,end=end) all_test_rews = [] timer_start=time.time() logs = open('/tmp/logs', 'a') loglines = '' for iteration in range(1, pa.num_epochs+1): ps = [] # threads manager = Manager() # managing return results manager_result = manager.list([]) ex_indices = list(range(pa.num_ex)) # np.random.shuffle(ex_indices) all_ob=[] all_action=[] grads_all = [] eprews = [] eplens = [] all_adv=[] all_eprews=[] all_eplens=[] ex_counter = 0 for ex in range(pa.num_ex): ex_idx = ex_indices[ex] p = Process(target=get_traj_worker, args=(pg_learners[ex_counter], envs[ex_idx], pa, manager_result, )) ps.append(p) ex_counter += 1 if ex_counter >= pa.batch_size or ex == pa.num_ex - 1: print(ex+1, "out of", pa.num_ex) ex_counter = 0 for p in ps: p.start() for p in ps: p.join() result = [] # convert list from shared memory for r in manager_result: result.append(r) ps = [] manager_result = manager.list([]) all_ob = concatenate_all_ob_across_examples([r["all_ob"] for r in result], pa) all_action = np.concatenate([r["all_action"] for r in result]) all_adv = np.concatenate([r["all_adv"] for r in result]) # Do policy gradient update step, using the first agent # put the new parameter in the last 'worker', then propagate the update at the end grads = pg_learners[pa.batch_size].get_grad(all_ob, all_action, all_adv) grads_all.append(grads) all_eprews.extend([r["all_eprews"] for r in result]) eprews.extend(np.concatenate([r["all_eprews"] for r in result])) # episode total rewards eplens.extend(np.concatenate([r["all_eplens"] for r in result])) # episode lengths # assemble gradients grads = grads_all[0] for i in range(1, len(grads_all)): for j in range(len(grads)): grads[j] += grads_all[i][j] # propagate network parameters to others params = pg_learners[pa.batch_size].get_params() rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps) for i in range(pa.batch_size + 1): pg_learners[i].set_net_params(params) timer_end=time.time() print ("-----------------") print ("Iteration: \t %i" % iteration) print ("NumTrajs: \t %i" % len(eprews)) print ("NumTimesteps: \t %i" % np.sum(eplens)) print ("Elapsed time\t %s" % (timer_end - timer_start), "seconds") print ("-----------------") # time.sleep(5) pg_resume = '/home/dell/testing_part2/%s_10ex.pkl_' % str(iteration) if iteration % 10 == 0: param_file = open(pg_resume, 'wb') pickle.dump(pg_learners[pa.batch_size].get_params(), param_file, -1) param_file.close() if iteration % 20 == 0: logline = test(iteration, pa, pg_resume, workloads,pg_learners[pa.batch_size]) loglines += logline if iteration % 20 == 0: logs.write(loglines) logs.flush() os.fsync(logs.fileno()) loglines = '' logs.close()