Example #1
0
def launch():
    agent = Heuristic_Agents()

    env = Env(0, 1)
    task_dist = Task_Dist()
    workloads = task_dist.gen_seq_workload()

    for ex in range(agent.params.num_ex):
        env.reset()
        env.workload_seq = workloads[ex]
        for i in range(len(workloads[ex])):
            env.generate_workload()
            env.seq_id += 1
        print('Testing : ', env.workload_seq)

        ob = env.observe()
        rews = []
        acts = []
        for _ in range(agent.params.episode_max_length):
            a = agent.get_action(env)
            plt1 = visualize_state(ob, agent.params,
                                   '/tmp/trajs/episode_%d' % int(_))
            ob, rews, done, status = env.step(a, _, rews)
            if status == 'Allocation_Success':
                finished_episode_len = _ + 1
            if done:
                break
        print('Test Actions: ', acts[:finished_episode_len])
        print('Reward : ', rews)
        print('Reward : ', sum(rews))
Example #2
0
    def __init__(self, cur_time, time_step):
        self.params = Parameters()
        self.cur_time = cur_time
        self.time_step = time_step
        self.machines = []
        self.waiting_tasks = []
        #initialize machines
        for i in range(self.params.num_machines):
            self.machines.append(Machine(i, self.params.machine_res_cap))

        self.task_dist = Task_Dist()
        self.workload_seq = None
        self.seq_id = 0
Example #3
0
def main():
    import params
    import sys
    task_dist = Task_Dist()
    workloads = task_dist.gen_seq_workload()

    pa = params.Params()
    env = Env(0, 1)
    #	env.workload_seq = workloads[0]
    import os
    import sys, getopt
    import threading
    import tensorflow as tf
    import numpy as np
    from time import time, sleep

    import random
    #from agent import Agent

    # Train for this many steps
    #	T_MAX = 100000000
    # Use this many threads
    #NUM_THREADS = 8
    # Initial learning rate for Adam
    #	INITIAL_LEARNING_RATE = 1e-4
    # The discount factor
    #	DISCOUNT_FACTOR = 0.99
    # Evaluate the agent and print out average reward every this many steps
    #	VERBOSE_EVERY = 40000
    # Update the parameters in each thread after this many steps in that thread
    #I_ASYNC_UPDATE = 5
    # Use this global variable to exit the training loop in each thread once we've finished.
    training_finished = False
    tf.reset_default_graph()
    sess = tf.Session()

    agent = Agent(sess, pa)
    #	a3c(pa)
    sess.run(tf.global_variables_initializer())
    # create a tensorflow saver pbject and keep 50000 checkpoints
    saver = tf.train.Saver(max_to_keep=50000)
    async_trainer(agent, env, pa, sess, saver, workloads)
Example #4
0
class Env1:
    def __init__(self, cur_time, time_step):
        self.params = Parameters()
        self.cur_time = cur_time
        self.time_step = time_step
        self.machines = []
        self.waiting_tasks = []
        #initialize machines
        for i in range(self.params.num_machines):
            self.machines.append(Machine(i, self.params.machine_res_cap))

        self.task_dist = Task_Dist()
        self.workload_seq = None
        self.seq_id = 0

    #Generate workload and populate self.waiting tasks after each interval
    def generate_workload(self):
        if len(self.workload_seq) <= self.seq_id:
            return

    #	print('Incoming Tasks: ',self.seq_id, self.workload_seq[self.seq_id])
        if self.workload_seq[self.seq_id]:
            for task_type in self.workload_seq[self.seq_id]:
                task_color, task_cpu_limit, task_finish_time = self.task_dist.get_task_details(
                    task_type)

                max_color = task_color
                for tsk in self.waiting_tasks:
                    if task_type == tsk.service:
                        if max_color <= tsk.color:
                            max_color = tsk.color
                for mcn in self.machines:
                    for tsk in mcn.running_tasks:
                        if task_type == tsk.service and max_color <= tsk.color:
                            max_color = tsk.color
                task_color = max_color + 0.01

                self.waiting_tasks.append(
                    Task(task_type, task_color, task_cpu_limit,
                         task_finish_time, self.cur_time))

    def observe(self):
        img_repr = np.zeros((self.params.state_len, self.params.state_width))
        #add machines
        used_width = 0
        for res in range(self.params.num_res):
            for machine in self.machines:
                img_repr[:, used_width:used_width +
                         self.params.machine_res_cap] = machine.canvas[
                             res, :, :]
                used_width += self.params.machine_res_cap
        #add backlog queue
        if len(self.waiting_tasks) > 0:
            t = 0
            for i in range(self.params.state_len):
                for j in range(self.params.backlog_width):
                    img_repr[i, used_width + j] = self.waiting_tasks[t].color
                    t += 1
                    if (t == len(self.waiting_tasks)):
                        break
                if (t == len(self.waiting_tasks)):
                    break

        used_width += self.params.backlog_width
        assert used_width == self.params.state_width

        return img_repr

    def step(self, action, episode_time, rewards, c, log=False):
        status = None
        done = False
        reward = 0

        if len(self.waiting_tasks) == 0:
            status = 'Backlog_Empty'

        elif action == self.params.num_machines:
            status = 'Invalid'
        else:
            allocated = self.machines[action].allocate_task(
                self.waiting_tasks[c], episode_time)
            if allocated:
                status = 'Allocation_Success'
                self.waiting_tasks[c].start_time = self.cur_time
                self.waiting_tasks = self.waiting_tasks[
                    0:c] + self.waiting_tasks[c + 1:]
            else:
                status = 'Allocation_Failed'

        if status == 'Allocation_Success' or 'Invalid' or 'Allocation_Failed' or 'Backlog_Empty':
            self.seq_id += 1
            # self.generate_workload()
            self.update()
            #TODO fix max no of jobs, so when all jobs complete episode ends
            unfinished = 0
            for machine in self.machines:
                if len(machine.running_tasks) != 0:
                    unfinished += 1
            if unfinished == 0 and len(self.waiting_tasks) == 0:
                done = True
            if self.cur_time > self.params.episode_max_length:  # run too long, force termination
                done = True
            rewards = self.get_reward(rewards, log)
            self.generate_workload()
            # if status == 'Allocation_Failed':
            # 	reward += -100
            # if status == 'No_More_Jobs' and action < self.params.num_machines:
            # 	reward += -100
            # if len(self.waiting_tasks) != 0 and action == self.params.num_machines:
            # 	reward += -100

        ob = self.observe()
        if done:
            self.reset()
        return ob, rewards, done, status

    def reset(self):
        self.cur_time = 0
        self.machines = []
        self.waiting_tasks = []
        for i in range(self.params.num_machines):
            self.machines.append(Machine(i, self.params.machine_res_cap))
        self.seq_id = 0
        #self.workload_seq = self.task_dist.gen_seq_workload()

    def get_suitable_machines(self, task):
        return [
            machine for machine in self.machines
            if machine.cpus_left > task.cpu_limit
        ]

    def update(self):
        self.cur_time += self.time_step
        for machine in self.machines:
            machine.update(self.task_dist, self.cur_time)

    def schedule(self):
        unscheduled_tasks = []
        for task in self.waiting_tasks:
            # suitable_machines = self.get_suitable_machines(task)
            # if not suitable_machines:
            # 	break

            machine = self.find_best_machine(task, suitable_machines)
            if (machine is not None):
                task.start_time = self.cur_time
                machine[0].allocate_task(task)
            else:
                unscheduled_tasks.append(task)
        self.waiting_tasks = unscheduled_tasks

    def find_best_machine(self, task, suitable_machines):
        if np.random.randint(2):
            return np.random.choice(suitable_machines,
                                    1,
                                    p=[1 / len(suitable_machines)] *
                                    len(suitable_machines))
        else:
            return None

    def get_reward(self, rewards, log):
        rewards.append(0)
        #Penaly for putting a task on hold
        rewards[-1] += self.params.hold_penalty * len(self.waiting_tasks) / (
            self.params.state_len * self.params.state_width
        )  #TODO add some penaly factor
        # print('Hold : ', self.params.hold_penalty * len(self.waiting_tasks))
        #Penalty using cross co-relation of cpu_util
        for i, machine in enumerate(self.machines):
            # print('cpus left:', machine.cpus_left)
            if len(machine.running_tasks) == 1 and len(
                    machine.running_tasks[0].cpu_util) == 1:
                # print(i+1, 'New machine allocated')
                rewards[-1] += self.params.machine_used_penalty / (
                    self.params.state_len * self.params.state_width)
            if machine.cpus_left < 0 and not machine.running_tasks[
                    -1].already_overshoot:
                if log:
                    print(i + 1, 'Overshoot', self.params.overshoot_penalty)
                machine.running_tasks[-1].already_overshoot = True
                rewards[machine.running_tasks[-1].
                        episode_time] += self.params.overshoot_penalty / (
                            self.params.state_len * self.params.state_width)
            for task in machine.running_tasks:
                for tsk in task.conf_at_scheduling:
                    if tsk in machine.running_tasks:
                        rewards[
                            task.
                            episode_time] += self.params.interference_penalty * (
                                task.cpu_util[-1] *
                                tsk.cpu_util[-1]) / (self.params.state_len *
                                                     self.params.state_width)
                        # print('Other : ', self.params.interference_penalty * (task.cpu_util[-1] * tsk.cpu_util[-1]))
            # for i in range(len(machine.running_tasks)):
            # 	for j in range(i+1, len(machine.running_tasks)):
            # 		task_i, task_j = machine.running_tasks[i], machine.running_tasks[j]
            # 		# if task_i != task_j and len(task_i.cpu_util) > self.params.hist_wind_len and len(task_j.cpu_util) > self.params.hist_wind_len:
            # 		# 	reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-self.params.hist_wind_len:], task_j.cpu_util[-self.params.hist_wind_len:]))
            # 		if task_i != task_j:
            # 			# m = min(self.params.hist_wind_len, min(len(task_i.cpu_util), len(task_j.cpu_util)))
            # 			# reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-m:], task_j.cpu_util[-m:]))
            # 			reward += self.params.interference_penalty * (task_i.cpu_util[-1] * task_j.cpu_util[-1])
        return rewards
Example #5
0
def main():
	agent = Heuristic_Agents()

	env = Env(0, 1)
	task_dist = Task_Dist()
	delay=0
	workloads = task_dist.gen_seq_workload()
	logs = open('/home/dell/logs_cpu_mem_tetris_23ex', 'a')
	no_machines=[]
	logline = str(0) + '\n'
	ex_indices=[]
	for ex in range(agent.params.num_ex):
		env.reset()
		env.workload_seq = workloads[ex]
	#	for i in range(len(workloads[ex+agent.params.num_ex])):
		env.generate_workload()
#		env.seq_id += 1
		print('Testing : ', env.workload_seq)

		ob = env.observe()
		rews = []
		acts = []
		cpu_crs = [0]*10
		cpu_crs_max=[0]*10
		mem_crs = [0]*10
		mem_crs_max=[0]*10
		c_utils = ''
		m_utils = ''
		suffer  = []
		np.random.seed(20)
		for _ in range(agent.params.episode_max_length):
			a,task,task_id = agent.get_action(env)
			acts.append(a)
			#if ex==0:
			#	plt1 = visualize_state(ob, agent.params, '/home/dell/trajs/worst_fit20/episode_%d' % int(_))
			ob, rews, done, status= env.step(a, _, rews,task_id)
			if status == 'Allocation_Success':

				finished_episode_len = _ + 1
			#	print('Example>>>',ex)
			#	print('Service Name>>',task.service)
			#	print('Entry time>>',task.enter_time)
				delay+=task.start_time-task.enter_time
			#	print('Scheduling Time>>',task.start_time)
			#	print('Delay>>>>',task.start_time - task.enter_time)
			if done:
				break



			c_util = ''
			m_util = ''
			for k, machine in enumerate(env.machines):
				if len(machine.running_tasks) > 0:
					if machine.cpus_left >= 0:
						c_util+=str(machine.total_cpus - machine.cpus_left) +','
					else:
						c_util+=str(machine.total_cpus+abs(machine.cpus_left)) +','
						suffer.append(abs(machine.cpus_left))
				else:
					c_util += str(0) + ','
			for k, machine in enumerate(env.machines):
				if len(machine.running_tasks) > 0:
					if machine.mems_left >= 0:
						m_util+=str(machine.total_mems - machine.mems_left) +','
					else:
						m_util+=str(machine.total_mems+abs(machine.mems_left)) +','
						suffer.append(abs(machine.mems_left))
				else:
					m_util += str(0) + ','

				cpu_crs_this_time = [0]*agent.params.num_machines
				mem_crs_this_time = [0]*agent.params.num_machines

				for i in range(len(machine.running_tasks)):
					for j in range(i+1, len(machine.running_tasks)):
						task_i, task_j = machine.running_tasks[i], machine.running_tasks[j]
						if task_i != task_j and len(task_i.cpu_util)>0 and len(task_j.cpu_util)>0:
							cpu_crs[k] += agent.params.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1)
							cpu_crs_this_time[k] += agent.params.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1)
						if task_i != task_j and len(task_i.mem_util)>0 and len(task_j.mem_util)>0:
							mem_crs[k] += agent.params.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1)
							mem_crs_this_time[k] += agent.params.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1)
				cpu_crs_max[k] = max(cpu_crs_max[k], cpu_crs_this_time[k])
				mem_crs_max[k] = max(mem_crs_max[k], mem_crs_this_time[k])
				#################
			c_utils += c_util + '|'
			m_utils += m_util + '|'
	
		logline += str(str(_ -1)+'|'+str(c_utils) + str(finished_episode_len)) + '\n' + str(sum(rews)) + '\n' + str(sum(suffer))  +'\n'
		logline +=str(m_utils) +'\n'
		for i in range(len(env.machines)):
			logline += str(cpu_crs[i]) + ','
		logline = logline[:-1] + '\n'
		for i in range(len(env.machines)):
			logline += str(cpu_crs_max[i]) + ','
		logline = logline[:-1]
		logline += '\n'

		for i in range(len(env.machines)):
			logline += str(mem_crs[i]) + ','
		logline = logline[:-1] + '\n'
		for i in range(len(env.machines)):
			logline += str(mem_crs_max[i]) + ','
		logline = logline[:-1]
		logline += '\n'
		print('Test Actions: ', acts[:finished_episode_len])
		print('Reward : ', rews)
	
		print('Reward : ', sum(rews))
		print('Number of machines used>>',len(set(acts[:finished_episode_len]))-1)

		no_machines.append(len(set(acts[:finished_episode_len]))-1)
		if len(set(acts[:finished_episode_len]))-1 ==8 or len(set(acts[:finished_episode_len]))-1 ==9 :
			ex_indices.append(ex)


	print('Average Delay>>>',delay/agent.params.num_test_ex)
	print('Number of examples with 6 machines>>>',no_machines.count(6))
	print('Number of examples with 7 machines>>>',no_machines.count(7))
	print('Number of examples with 8 machines>>>',no_machines.count(8))
	print('Number of examples with 9 machines>>>',no_machines.count(9))
	print('Number of examples with 10 machines>>>',no_machines.count(10))
	logs.write(logline)
	logs.flush()
	# import pickle
	# with open("workloads8_9_threecombo_68_random_indices.txt", "wb") as fp:   #Pickling
	# 	pickle.dump(ex_indices, fp)
	os.fsync(logs.fileno())
Example #6
0
def launch(pa, pg_resume=None, render=False, repre='image', end='no_new_job'):
    task_dist = Task_Dist()
    workloads = task_dist.gen_seq_workload()

    # ----------------------------
    print("Preparing for workers...")
    # ----------------------------
    #logs = open('/home/shanka/logs_packing_deeprm', 'a')
    pg_learners = []
    envs = []
    job_distribution = Dist()
    nw_len_seqs, nw_size_seqs = job_distribution.generate_sequence_work(
        pa, seed=42)

    for ex in range(pa.num_ex):

        print("-prepare for env-", ex)

        env = environment.Env(pa,
                              nw_len_seqs=nw_len_seqs,
                              nw_size_seqs=nw_size_seqs,
                              render=False,
                              repre=repre,
                              end=end)
        env.seq_no = ex
        envs.append(env)

    for ex in range(pa.batch_size +
                    1):  # last worker for updating the parameters

        print("-prepare for worker-", ex)

        pg_learner = pg_network.PGLearner(pa)

        if pg_resume is not None:
            net_handle = open(pg_resume, 'rb')
            net_params = pickle.load(net_handle)
            pg_learner.set_net_params(net_params)

        pg_learners.append(pg_learner)

    accums = init_accums(pg_learners[pa.batch_size])

    # --------------------------------------
    # print("Preparing for reference data...")
    # --------------------------------------
    # print('Start testing...')

    # for ite in range(10,1000,10):
    # 	pg_resume = pa.output_filename +'_'+str(ite)+'.pkl'

    # 	logline=test(ite,pa, pg_resume,workloads,repre)

    # 	logs.write(logline)
    # 	logs.flush()

    # 	os.fsync(logs.fileno())
    # return

    # ref_discount_rews, ref_slow_down = slow_down_cdf.launch(pa, pg_resume=None, render=False, plot=False, repre=repre, end=end)
    # mean_rew_lr_curve = []
    # max_rew_lr_curve = []
    # slow_down_lr_curve = []

    # --------------------------------------
    print("Start training...")
    # --------------------------------------

    timer_start = time.time()

    for iteration in range(1, pa.num_epochs):

        ps = []  # threads
        manager = Manager()  # managing return results
        manager_result = manager.list([])

        ex_indices = range(pa.num_ex)
        #    np.random.shuffle(ex_indices)

        all_eprews = []
        grads_all = []
        loss_all = []
        eprews = []
        eplens = []
        all_slowdown = []
        all_entropy = []

        ex_counter = 0
        for ex in range(pa.num_ex):

            ex_idx = ex_indices[ex]
            p = Process(target=get_traj_worker,
                        args=(
                            pg_learners[ex_counter],
                            envs[ex_idx],
                            pa,
                            manager_result,
                        ))
            ps.append(p)

            ex_counter += 1

            if ex_counter >= pa.batch_size or ex == pa.num_ex - 1:

                print(ex, "out of", pa.num_ex)

                ex_counter = 0

                for p in ps:
                    p.start()

                for p in ps:
                    p.join()

                result = []  # convert list from shared memory
                for r in manager_result:
                    result.append(r)

                ps = []
                manager_result = manager.list([])

                all_ob = concatenate_all_ob_across_examples(
                    [r["all_ob"] for r in result], pa)
                all_action = np.concatenate([r["all_action"] for r in result])
                all_adv = np.concatenate([r["all_adv"] for r in result])

                # Do policy gradient update step, using the first agent
                # put the new parameter in the last 'worker', then propagate the update at the end
                grads = pg_learners[pa.batch_size].get_grad(
                    all_ob, all_action, all_adv)

                grads_all.append(grads)

                all_eprews.extend([r["all_eprews"] for r in result])

                eprews.extend(np.concatenate([r["all_eprews"] for r in result
                                              ]))  # episode total rewards
                eplens.extend(np.concatenate([r["all_eplens"] for r in result
                                              ]))  # episode lengths

        #        all_slowdown.extend(np.concatenate([r["all_slowdown"] for r in result]))
        #        all_entropy.extend(np.concatenate([r["all_entropy"] for r in result]))

        # assemble gradients
        grads = grads_all[0]
        for i in range(1, len(grads_all)):
            for j in range(len(grads)):
                grads[j] += grads_all[i][j]

        # propagate network parameters to others
        params = pg_learners[pa.batch_size].get_params()

        rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho,
                                pa.rms_eps)

        for i in range(pa.batch_size + 1):
            pg_learners[i].set_net_params(params)

        timer_end = time.time()

        print("-----------------")
        print("Iteration: \t %i" % iteration)
        print("NumTrajs: \t %i" % len(eprews))
        print("NumTimesteps: \t %i" % np.sum(eplens))
        #  print "Loss:     \t %s" % np.mean(loss_all)
        print("MaxRew: \t %s" % np.average([np.max(rew)
                                            for rew in all_eprews]))
        print("MeanRew: \t %s +- %s" % (np.mean(eprews), np.std(eprews)))
        # print "MeanSlowdown: \t %s" % np.mean(all_slowdown)
        print("MeanLen: \t %s +- %s" % (np.mean(eplens), np.std(eplens)))
        #     print "MeanEntropy \t %s" % (np.mean(all_entropy))
        print("Elapsed time\t %s" % (timer_end - timer_start), "seconds")
        print("-----------------")

        #    max_rew_lr_curve.append(np.average([np.max(rew) for rew in all_eprews]))
        #    mean_rew_lr_curve.append(np.mean(eprews))
        #    slow_down_lr_curve.append(np.mean(all_slowdown))

        if iteration % pa.output_freq == 0:
            pg_resume = pa.output_filename + '_' + str(iteration) + '.pkl'
            param_file = open(pg_resume, 'wb')
            pickle.dump(pg_learner.get_params(), param_file, -1)
            param_file.close()
            test(pa, pg_resume, workloads, repre)
Example #7
0
def test(pa):
    def ex_test(pg_learner, env, pa, result):
        env.reset()
        env.generate_workload()
        ob = env.observe()
        acts = []
        probs = []
        crs = [0] * pa.num_machines
        crs_max = [0] * pa.num_machines
        rews = []
        utils = ''
        suffer = []
        finished_episode_len = 0
        logline = ''
        for _ in range(pa.episode_max_length):
            a = pg_learner.get_action(env)
            ob, rews, done, status = env.step(a, _, rews)
            acts.append(a)
            if status == 'Allocation_Success':
                finished_episode_len = _ + 1
            if done:
                break
            ##############logs
            util = ''
            for k, machine in enumerate(env.machines):
                if len(machine.running_tasks) > 0:
                    if machine.cpus_left >= 0:
                        util += str(machine.total_cpus -
                                    machine.ac_cpus_left) + ','
                    else:
                        util += str(machine.total_cpus) + ','
                    if machine.ac_cpus_left < 0:
                        suffer.append(abs(machine.ac_cpus_left))
                else:
                    util += str(0) + ','
                crs_this_time = [0] * pa.num_machines
                for i in range(len(machine.running_tasks)):
                    for j in range(i + 1, len(machine.running_tasks)):
                        task_i, task_j = machine.running_tasks[
                            i], machine.running_tasks[j]
                        if task_i != task_j:
                            crs[k] += pa.interference_penalty * (
                                task_i.cpu_util[-1] *
                                task_j.cpu_util[-1]) * (-1)
                            crs_this_time[k] += pa.interference_penalty * (
                                task_i.cpu_util[-1] *
                                task_j.cpu_util[-1]) * (-1)
                crs_max[k] = max(crs_max[k], crs_this_time[k])
                #################
            utils += util + '|'
        logline += str(
            str(_ - 1) + '|' + str(utils) +
            str(finished_episode_len)) + '\n' + str(sum(rews)) + '\n' + str(
                sum(suffer)) + '\n'
        for i in range(len(env.machines)):
            logline += str(crs[i]) + ','
        logline = logline[:-1] + '\n'
        for i in range(len(env.machines)):
            logline += str(crs_max[i]) + ','
        logline = logline[:-1]
        logline += '\n'

        result.append(logline)

    pg_learners = []
    envs = []
    task_dist = Task_Dist()
    workloads = task_dist.gen_seq_workload()
    for ex in range(pa.num_test_ex):
        print("-prepare for env-", ex)
        env = Env(0, 1)
        env.workload_seq = workloads[ex + pa.num_ex]
        envs.append(env)

    for ex in range(pa.batch_size):  # last worker for updating the parameters
        print("-prepare for worker-", ex)
        pg_learner = Heuristic_Agents()
        pg_learners.append(pg_learner)

    logs = open('/tmp/logs3', 'a')
    loglines = ''
    for it in range(2, pa.num_epochs + 1, 2):
        if (it % 10):
            print('Iteration : ', it)

        ps = []  # threads
        manager = Manager()  # managing return results
        manager_result = manager.list([])
        ex_counter = 0
        loglines += str(it) + '\n'
        for ex in range(pa.num_test_ex):
            p = Process(target=ex_test,
                        args=(
                            pg_learners[ex_counter],
                            envs[ex],
                            pa,
                            manager_result,
                        ))
            ps.append(p)

            ex_counter += 1

            if ex_counter >= pa.batch_size or ex == pa.num_test_ex - 1:
                ex_counter = 0
                for p in ps:
                    p.start()

                for p in ps:
                    p.join()

                # convert list from shared memory
                for r in manager_result:
                    loglines += r

                ps = []
                manager_result = manager.list([])
    logs.write(loglines)
Example #8
0
class Env:
    def __init__(self, cur_time, time_step):
        self.params = Params()
        self.cur_time = cur_time
        self.time_step = time_step
        self.machines = []
        self.waiting_tasks = []
        #initialize machines
        for i in range(self.params.num_machines):
            self.machines.append(
                Machine(i, self.params.machine_res_cap[0],
                        self.params.machine_res_cap[1]))

        self.task_dist = Task_Dist()
        self.workload_seq = None
        self.seq_id = 0

    #Generate workload and populate self.waiting tasks after each interval
    def generate_workload(self):
        if len(self.workload_seq) <= self.seq_id:
            return

    #	print('Incoming Tasks: ',self.seq_id, self.workload_seq[self.seq_id])
        if self.workload_seq[self.seq_id]:
            for task_type in self.workload_seq[self.seq_id]:
                task_color, task_cpu_limit, task_mem_limit, task_finish_time = self.task_dist.get_task_details(
                    task_type)

                max_color = task_color
                for tsk in self.waiting_tasks:
                    if task_type == tsk.service:
                        if max_color <= tsk.color:
                            max_color = tsk.color
                for mcn in self.machines:
                    for tsk in mcn.running_tasks:
                        if task_type == tsk.service and max_color <= tsk.color:
                            max_color = tsk.color
                task_color = max_color + 0.01

                self.waiting_tasks.append(
                    Task(task_type, task_color, task_cpu_limit, task_mem_limit,
                         task_finish_time, self.cur_time))

    #return the state of environment as 2D-matrix

    def observe(self):
        img_repr = np.zeros((self.params.state_len, self.params.state_width))
        #add machines
        used_width = 0
        for res in range(self.params.num_res):
            for machine in self.machines:
                #	print(res,machine.canvas[res][0,:,:].shape)
                if res == 0:
                    img_repr[:, used_width:used_width + self.params.
                             machine_res_cap[res]] = machine.canvas1[0, :, :]
                else:
                    img_repr[:, used_width:used_width + self.params.
                             machine_res_cap[res]] = machine.canvas2[0, :, :]
            #	print('image',img_repr)
                used_width += self.params.machine_res_cap[res] + 1
        #add backlog queue
        if len(self.waiting_tasks) > 0:
            t = 0
            for i in range(self.params.state_len):
                for j in range(self.params.backlog_width):
                    img_repr[i, used_width + j] = self.waiting_tasks[t].color
                    t += 1
                    if (t == len(self.waiting_tasks)):
                        break
                if (t == len(self.waiting_tasks)):
                    break

        used_width += self.params.backlog_width
        assert used_width == self.params.state_width

        k = -1
        for res in range(self.params.num_res):
            for machine in self.machines:
                k += self.params.machine_res_cap[res] + 1
                j = 0

                for m in machine.running_tasks:
                    img_repr[j, k] = m.color + machine.mid
                    j += 1

        return img_repr

    #changes the state of system by taking the action and returns the rewards, new state due to that action
    def step(self, action, episode_time, rewards, task_no):
        status = None
        done = False
        reward = 0

        if len(self.waiting_tasks) == 0:
            status = 'Backlog_Empty'
        elif action == self.params.num_machines:
            status = 'Invalid'
        else:
            allocated = self.machines[action].allocate_task(
                self.waiting_tasks[0], episode_time)
            if allocated:
                status = 'Allocation_Success'
                #	print('Current Time>>',self.cur_time)
                self.waiting_tasks[task_no].start_time = self.cur_time
                self.waiting_tasks = self.waiting_tasks[
                    0:task_no] + self.waiting_tasks[task_no + 1:]
            else:
                status = 'Allocation_Failed'

        if (status == 'Invalid') or (status == 'Allocation_Failed') or (
                status == 'Backlog_Empty'):
            self.seq_id += 1
            # self.generate_workload()
            self.update()
            #TODO fix max no of jobs, so when all jobs complete episode ends
            unfinished = 0
            for machine in self.machines:
                if len(machine.running_tasks) != 0:
                    unfinished += 1
            if unfinished == 0 and len(self.waiting_tasks) == 0:
                done = True
            if self.cur_time > self.params.episode_max_length:  # run too long, force termination
                done = True
            rewards = self.get_reward(rewards)
            self.generate_workload()
            # if status == 'Allocation_Failed':
            # 	reward += -100
            # if status == 'No_More_Jobs' and action < self.params.num_machines:
            # 	reward += -100
            # if len(self.waiting_tasks) != 0 and action == self.params.num_machines:

            # 	reward += -100

        else:
            unfinished = 0
            for machine in self.machines:
                if len(machine.running_tasks) != 0:
                    unfinished += 1
            if unfinished == 0 and len(self.waiting_tasks) == 0:
                done = True
            if self.cur_time > self.params.episode_max_length:  # run too long, force termination
                done = True
            rewards = self.get_reward(rewards)

        ob = self.observe()
        if done:
            self.reset()

        return ob, rewards, done, status

    #reset the system by making all machines empty and time to 0
    def reset(self):
        self.cur_time = 0
        self.machines = []
        self.waiting_tasks = []
        for i in range(self.params.num_machines):
            self.machines.append(
                Machine(i, self.params.machine_res_cap[0],
                        self.params.machine_res_cap[1]))
        self.seq_id = 0

    #update time, status of running tasks in every machine
    def update(self):
        self.cur_time += self.time_step
        for machine in self.machines:
            machine.update(self.task_dist, self.cur_time)

    def get_reward(self, rewards):
        rewards.append(0)

        #Penaly for putting a task on hold
        rewards[-1] += self.params.hold_penalty * len(
            self.waiting_tasks)  #TODO add some penaly factor
        # print('Hold : ', self.params.hold_penalty * len(self.waiting_tasks))
        #Penalty using cross co-relation of cpu_util
        for i, machine in enumerate(self.machines):
            tasks = []
            # print('cpus left:', machine.cpus_left)
            if len(machine.running_tasks) > 0 and machine.cpus_left > 0:
                # print(i+1, 'New machine allocated')
                rewards[-1] += (-1) * pow(machine.cpus_left,
                                          self.params.machine_used_penalty)
            if len(machine.running_tasks) > 0 and machine.mems_left > 0:
                # print(i+1, 'New machine allocated')
                rewards[-1] += (-1) * pow(machine.mems_left,
                                          self.params.machine_used_penalty)
        #	if len(machine.running_tasks) == 1 and len(machine.running_tasks[0].cpu_util) == 0:
        #		print(i+1, 'New machine allocated')
        #		rewards[-1] += self.params.machine_used_penalty
            for j, task in enumerate(reversed(machine.running_tasks)):
                tasks.append(task)
                if j == 0:
                    if machine.cpus_left < 0 and not task.already_overshoot_cpu:
                        print(i + 1, 'OvershootA_CPU', abs(machine.cpus_left))
                        task.already_overshoot_cpu = True
                        rewards[
                            task.episode_time] += self.params.overshoot_penalty
                else:
                    sum = 0
                    for m in tasks[0:j]:
                        if len(m.cpu_util) > 0:
                            sum += m.cpu_util[-1]
                    if (machine.cpus_left +
                            sum) < 0 and not task.already_overshoot_cpu:
                        print(i + 1, 'OvershootB_CPU',
                              abs(machine.cpus_left + sum))
                        task.already_overshoot_cpu = True
                        rewards[
                            task.episode_time] += self.params.overshoot_penalty

            tasks = []
            for j, task in enumerate(reversed(machine.running_tasks)):
                tasks.append(task)
                if j == 0:
                    if machine.mems_left < 0 and not task.already_overshoot_mem:
                        print(i + 1, 'OvershootA_MEM', abs(machine.mems_left))
                        task.already_overshoot_mem = True
                        rewards[
                            task.episode_time] += self.params.overshoot_penalty
                else:
                    sum = 0
                    for m in tasks[0:j]:
                        if len(m.mem_util) > 0:
                            sum += m.mem_util[-1]
                    if (machine.mems_left +
                            sum) < 0 and not task.already_overshoot_mem:
                        print(i + 1, 'OvershootB_MEM',
                              abs(machine.mems_left + sum))
                        task.already_overshoot_mem = True
                        rewards[
                            task.episode_time] += self.params.overshoot_penalty

            for task in machine.running_tasks:
                for tsk in task.conf_at_scheduling:
                    if tsk in machine.running_tasks:
                        if len(task.cpu_util) > 0 and len(tsk.cpu_util) > 0:
                            rewards[
                                task.
                                episode_time] += self.params.interference_penalty_cpu * (
                                    task.cpu_util[-1] * tsk.cpu_util[-1])
                        if len(task.mem_util) > 0 and len(tsk.mem_util) > 0:
                            rewards[
                                task.
                                episode_time] += self.params.interference_penalty_mem * (
                                    task.mem_util[-1] * tsk.mem_util[-1])
                        # print('Other : ', self.params.interference_penalty * (task.cpu_util[-1] * tsk.cpu_util[-1]))
            # for i in range(len(machine.running_tasks)):
            # 	for j in range(i+1, len(machine.running_tasks)):
            # 		task_i, task_j = machine.running_tasks[i], machine.running_tasks[j]
            # 		# if task_i != task_j and len(task_i.cpu_util) > self.params.hist_wind_len and len(task_j.cpu_util) > self.params.hist_wind_len:
            # 		# 	reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-self.params.hist_wind_len:], task_j.cpu_util[-self.params.hist_wind_len:]))
            # 		if task_i != task_j:
            # 			# m = min(self.params.hist_wind_len, min(len(task_i.cpu_util), len(task_j.cpu_util)))
            # 			# reward += self.params.interference_penalty * (np.correlate(task_i.cpu_util[-m:], task_j.cpu_util[-m:]))
            # 			reward += self.params.interference_penalty * (task_i.cpu_util[-1] * task_j.cpu_util[-1])
        return rewards
Example #9
0
def test2(pa):
	def ex_test(pg_learner, env, pa, result):
		env.reset()
		env.generate_workload()
		ob = env.observe()
		acts = []
		probs = []
		cpu_crs = [0]*pa.num_machines
		cpu_crs_max = [0]*pa.num_machines
		mem_crs = [0]*pa.num_machines
		mem_crs_max = [0]*pa.num_machines
		rews = []
		c_utils = ''
		m_utils= ''
		suffer = []
		finished_episode_len = 0
		logline = ''
		for _ in range(pa.episode_max_length):
			act_prob = pg_learner.get_one_act_prob(ob)
			csprob_n = np.cumsum(act_prob)
			a = np.argmax(act_prob)
			ob, rews, done, status= env.step(a, _, rews)
			acts.append(a)
			probs.append(act_prob)
			if status == 'Allocation_Success':
				finished_episode_len = _ + 1
			if done:
				break
			##############logs
			c_util = ''
			m_util= ''
			for k, machine in enumerate(env.machines):
				if len(machine.running_tasks) > 0:
					if machine.cpus_left >= 0:
						c_util += str(machine.total_cpus - machine.cpus_left) + ','
					else:
						c_util += str(machine.total_cpus) + ','
						suffer.append(abs(machine.cpus_left))
				else:
					c_util += str(0) + ','
			for k, machine in enumerate(env.machines):
				if len(machine.running_tasks) > 0:
					if machine.mems_left >= 0:
						m_util += str(machine.total_mems - machine.mems_left) + ','
					else:
						m_util += str(machine.total_mems) + ','
						suffer.append(abs(machine.mems_left))
				else:
					m_util += str(0) + ','
				cpu_crs_this_time = [0]*pa.num_machines
				for i in range(len(machine.running_tasks)):
					for j in range(i+1, len(machine.running_tasks)):
						task_i, task_j = machine.running_tasks[i], machine.running_tasks[j]
						if task_i != task_j and len(task_i.cpu_util)>0 and len(task_j.cpu_util)>0:
							cpu_crs[k] += pa.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1)
							cpu_crs_this_time[k] += pa.interference_penalty_cpu * (task_i.cpu_util[-1] * task_j.cpu_util[-1]) * (-1)
				cpu_crs_max[k] = max(cpu_crs_max[k], cpu_crs_this_time[k])
				#################
				mem_crs_this_time = [0]*pa.num_machines
				for i in range(len(machine.running_tasks)):
					for j in range(i+1, len(machine.running_tasks)):
						task_i, task_j = machine.running_tasks[i], machine.running_tasks[j]
						if task_i != task_j and len(task_i.mem_util)>0 and len(task_j.mem_util)>0:
							mem_crs[k] += pa.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1)
							mem_crs_this_time[k] += pa.interference_penalty_mem * (task_i.mem_util[-1] * task_j.mem_util[-1]) * (-1)
				mem_crs_max[k] = max(mem_crs_max[k], mem_crs_this_time[k])
				#################
			c_utils += c_util + '|'
			m_utils += m_util + '|'
		logline += str(str(_-1)+'|'+str(c_utils) + str(finished_episode_len)) + '\n' + str(sum(rews)) + '\n' + str(sum(suffer))  +'\n'
		logline+=str(m_utils) +'\n'
		for i in range(len(env.machines)):
			logline += str(cpu_crs[i]) + ','
		logline = logline[:-1] + '\n'
		for i in range(len(env.machines)):
			logline += str(cpu_crs_max[i]) + ','
		logline = logline[:-1]
		logline += '\n'

		for i in range(len(env.machines)):
			logline += str(mem_crs[i]) + ','
		logline = logline[:-1] + '\n'
		for i in range(len(env.machines)):
			logline += str(mem_crs_max[i]) + ','
		logline = logline[:-1]
		logline += '\n'

		result.append(logline)


	pg_learners = []
	envs = []
	task_dist = Task_Dist()
	workloads = task_dist.gen_seq_workload()
	for ex in range(pa.num_ex):
		print("-prepare for env-", ex)
		env = Env(0, 1)
		env.workload_seq = workloads[ex]
		envs.append(env)

	for ex in range(pa.batch_size):  # last worker for updating the parameters
		print("-prepare for worker-", ex)
		pg_learner = policy_network.PGLearner(pa)
		pg_learners.append(pg_learner)

	logs = open('/home/dell/logs_cpu_mem_aischedule', 'a')
	loglines = ''
	for it in range(1300, 1320, 20):
		if(it % 10):
			print('Iteration : ',it)
		pg_resume = '/home/dell/testing_part2/' + str(it) + '.pkl_'
		net_handle = open(pg_resume, 'rb')
		net_params = pickle.load(net_handle)
		for ex in range(pa.batch_size):
			pg_learners[ex].set_net_params(net_params)

		ps = []  # threads
		manager = Manager()  # managing return results
		manager_result = manager.list([])
		ex_counter = 0
		loglines += str(it) + '\n'
		for ex in range(pa.num_ex):
			p = Process(target=ex_test,
						args=(pg_learners[ex_counter], envs[ex], pa, manager_result, ))
			ps.append(p)

			ex_counter += 1

			if ex_counter >= pa.batch_size or ex == pa.num_test_ex - 1:
				ex_counter = 0
				for p in ps:
					p.start()

				for p in ps:
					p.join()

				# convert list from shared memory
				for r in manager_result:
					loglines += r 

				ps = []
				manager_result = manager.list([])
	logs.write(loglines)
Example #10
0
def launch(pa, pg_resume=None, save_freq = 50, render=False, repre='image', end='no_new_job', test_only=False):

	task_dist = Task_Dist()
	workloads = task_dist.gen_seq_workload()
	if test_only:
		test(0, pa, pg_resume, workloads)
		return

	pg_learners = []
	envs = []

	for ex in range(pa.num_ex):
		print("-prepare for env-", ex)
		env = Env(0, 1)
		env.workload_seq = workloads[ex]
		envs.append(env)

	for ex in range(pa.batch_size + 1):  # last worker for updating the parameters
		print("-prepare for worker-", ex)
		pg_learner = policy_network.PGLearner(pa)

		if pg_resume is not None:
			net_handle = open(pg_resume, 'rb')
			net_params = pickle.load(net_handle)
			pg_learner.set_net_params(net_params)

		pg_learners.append(pg_learner)

	accums = init_accums(pg_learners[pa.batch_size])

	print ('Preparing for Training from Scratch...')

	#   ref_discount_rews=slow_down_cdf.launch(pa,pg_resume=None,render=False,repre=repre,end=end)
	all_test_rews = []
	timer_start=time.time()


	logs = open('/tmp/logs', 'a')
	loglines = ''
	for iteration in range(1, pa.num_epochs+1):
		ps = []  # threads
		manager = Manager()  # managing return results
		manager_result = manager.list([])

		ex_indices = list(range(pa.num_ex))
	#	np.random.shuffle(ex_indices)

		all_ob=[]
		all_action=[]
		grads_all = []
		eprews = []
		eplens = []
		all_adv=[]
		all_eprews=[]
		all_eplens=[]

		ex_counter = 0
		for ex in range(pa.num_ex):
			ex_idx = ex_indices[ex]
			p = Process(target=get_traj_worker,
						args=(pg_learners[ex_counter], envs[ex_idx], pa, manager_result, ))
			ps.append(p)

			ex_counter += 1

			if ex_counter >= pa.batch_size or ex == pa.num_ex - 1:

				print(ex+1, "out of", pa.num_ex)

				ex_counter = 0

				for p in ps:
					p.start()

				for p in ps:
					p.join()

				result = []  # convert list from shared memory
				for r in manager_result:
					result.append(r)

				ps = []
				manager_result = manager.list([])

				all_ob = concatenate_all_ob_across_examples([r["all_ob"] for r in result], pa)
				all_action = np.concatenate([r["all_action"] for r in result])
				all_adv = np.concatenate([r["all_adv"] for r in result])

				# Do policy gradient update step, using the first agent
				# put the new parameter in the last 'worker', then propagate the update at the end
				grads = pg_learners[pa.batch_size].get_grad(all_ob, all_action, all_adv)

				grads_all.append(grads)

				all_eprews.extend([r["all_eprews"] for r in result])

				eprews.extend(np.concatenate([r["all_eprews"] for r in result]))  # episode total rewards
				eplens.extend(np.concatenate([r["all_eplens"] for r in result]))  # episode lengths

		# assemble gradients
		grads = grads_all[0]
		for i in range(1, len(grads_all)):
			for j in range(len(grads)):
				grads[j] += grads_all[i][j]

		# propagate network parameters to others
		params = pg_learners[pa.batch_size].get_params()

		rmsprop_updates_outside(grads, params, accums, pa.lr_rate, pa.rms_rho, pa.rms_eps)

		for i in range(pa.batch_size + 1):
			pg_learners[i].set_net_params(params)


		timer_end=time.time()
		print ("-----------------")
		print ("Iteration: \t %i" % iteration)
		print ("NumTrajs: \t %i" % len(eprews))
		print ("NumTimesteps: \t %i" % np.sum(eplens))
		print ("Elapsed time\t %s" % (timer_end - timer_start), "seconds")
		print ("-----------------")
		# time.sleep(5)
		pg_resume = '/home/dell/testing_part2/%s_10ex.pkl_' % str(iteration)
		if iteration % 10 == 0:
			param_file = open(pg_resume, 'wb')
			pickle.dump(pg_learners[pa.batch_size].get_params(), param_file, -1)
			param_file.close()

		if iteration % 20 == 0:
			logline = test(iteration, pa, pg_resume, workloads,pg_learners[pa.batch_size])
			loglines += logline
			if iteration % 20 == 0:
				logs.write(loglines)
				logs.flush()
				os.fsync(logs.fileno())
				loglines = ''
	logs.close()