コード例 #1
0
def drf(job_trace=None):
    if job_trace is None:
        job_trace = trace.Trace(None).get_trace()
    env = drf_env.DRF_Env("DRF", job_trace, None)
    while not env.end:
        env.step()
    return env.get_results()
コード例 #2
0
def val_loss(net, val_traces, logger, global_step):
    avg_loss = 0
    step = 0
    data = []
    for episode in range(len(val_traces)):
        job_trace = val_traces[episode]
        if pm.HEURISTIC == "DRF":
            env = drf_env.DRF_Env("DRF", job_trace, logger)
        elif pm.HEURISTIC == "FIFO":
            env = fifo_env.FIFO_Env("FIFO", job_trace, logger)
        elif pm.HEURISTIC == "SRTF":
            env = srtf_env.SRTF_Env("SRTF", job_trace, logger)
        elif pm.HEURISTIC == "Tetris":
            env = tetris_env.Tetris_Env("Tetris", job_trace, logger)
        ts = 0
        while not env.end:
            data += env.step()
            ts += 1
            if len(data) >= pm.MINI_BATCH_SIZE:
                # prepare a validation batch
                indexes = np.random.choice(len(data),
                                           size=pm.MINI_BATCH_SIZE,
                                           replace=False)
                inputs = []
                labels = []
                for index in indexes:
                    input, label = data[index]
                    inputs.append(input)
                    labels.append(label)
                # superversed learning to calculate gradients
                output, loss = net.get_sl_loss(np.stack(inputs),
                                               np.vstack(labels))
                avg_loss += loss
                # if step%50 == 0:
                # 	# # type, # of time slots in the system so far, normalized remaining epoch, dom resource
                # 	tb_logger.add_text(tag="sl:input+label+output:" + str(episode) + "_" + str(ts), value="input:" + \
                # 		" type: "+ str(input[0]) + " stay_ts: " + str(input[1]) + " rt: " + str(input[2]) \
                # 		+ " resr:" + str(input[3]) + "\n" +
                # 		" label: " + str(label) + "\n" + " output: " + str(output[-1]), step=global_step)
                step += 1
                data = []

    return avg_loss / step
コード例 #3
0
def sl_agent(net_weights_q, net_gradients_q, stats_q, id):
	logger = log.getLogger(name="agent_"+str(id), level=pm.LOG_MODE)
	logger.info("Start supervised learning, agent " + str(id) + " ...")

	if not pm.RANDOMNESS:
		np.random.seed(pm.np_seed+id+1)

	config = tf.ConfigProto()
	config.gpu_options.allow_growth = True
	with tf.Session(config=config) as sess, tf.device("/gpu:"+str(id%2)):
		policy_net = network.PolicyNetwork(sess, "policy_net", pm.TRAINING_MODE, logger)
		sess.run(tf.global_variables_initializer())  # to avoid batch normalization error

		global_step = 1
		avg_jct = []
		avg_makespan = []
		avg_reward = []
		if not pm.VAL_ON_MASTER:
			validation_traces = []  # validation traces
			for i in range(pm.VAL_DATASET):
				validation_traces.append(trace.Trace(None).get_trace())
		# generate training traces
		traces = []
		for episode in range(pm.TRAIN_EPOCH_SIZE):
			job_trace = trace.Trace(None).get_trace()
			traces.append(job_trace)
		mem_store = memory.Memory(maxlen=pm.REPLAY_MEMORY_SIZE)
		logger.info("Filling experience buffer...")
		for epoch in range(pm.TOT_TRAIN_EPOCHS):
			for episode in range(pm.TRAIN_EPOCH_SIZE):
				tic = time.time()
				job_trace = copy.deepcopy(traces[episode])
				if pm.HEURISTIC == "DRF":
					env = drf_env.DRF_Env("DRF", job_trace, logger)
				elif pm.HEURISTIC == "FIFO":
					env = fifo_env.FIFO_Env("FIFO", job_trace, logger)
				elif pm.HEURISTIC == "SRTF":
					env = srtf_env.SRTF_Env("SRTF", job_trace, logger)
				elif pm.HEURISTIC == "Tetris":
					env = tetris_env.Tetris_Env("Tetris", job_trace, logger)

				while not env.end:
					if pm.LOG_MODE == "DEBUG":
						time.sleep(0.01)
					data = env.step()
					logger.debug("ts length:" + str(len(data)))

					for (input, label) in data:
						mem_store.store(input, 0, label, 0)

					if mem_store.full():
						# prepare a training batch
						_, trajectories, _ = mem_store.sample(pm.MINI_BATCH_SIZE)
						input_batch = [traj.state for traj in trajectories]
						label_batch = [traj.action for traj in trajectories]

						# if global_step % 10 == 0:
						# 	print "input", input_batch[0]
						# 	print "label", label_batch[0]

						# pull latest weights before training
						weights = net_weights_q.get()
						if isinstance(weights, basestring) and weights == "exit":
							logger.info("Agent " + str(id) + " exits.")
							exit(0)
						policy_net.set_weights(weights)

						# superversed learning to calculate gradients
						entropy, loss, policy_grads = policy_net.get_sl_gradients(np.stack(input_batch),np.vstack(label_batch))
						for i in range(len(policy_grads)):
							assert np.any(np.isnan(policy_grads[i])) == False

						# send gradients to the central agent
						net_gradients_q.put(policy_grads)

						# validation
						if not pm.VAL_ON_MASTER and global_step % pm.VAL_INTERVAL == 0:
							val_tic = time.time()
							val_loss = validate.val_loss(policy_net, validation_traces, logger, global_step)
							jct, makespan, reward = validate.val_jmr(policy_net, validation_traces, logger, global_step)
							stats_q.put(("val", val_loss, jct, makespan, reward))
							val_toc = time.time()
							logger.info("Agent " + str(id) + " Validation at step " + str(global_step) + " Time: " + '%.3f'%(val_toc-val_tic))
						stats_q.put(("step:sl", entropy, loss))

						global_step += 1

				num_jobs, jct, makespan, reward = env.get_results()
				avg_jct.append(jct)
				avg_makespan.append(makespan)
				avg_reward.append(reward)
				if global_step%pm.DISP_INTERVAL == 0:
					logger.info("Agent\t AVG JCT\t Makespan\t Reward")
					logger.info(str(id) + " \t \t " + '%.3f' %(sum(avg_jct)/len(avg_jct)) + " \t\t" + " " + '%.3f' %(1.0*sum(avg_makespan)/len(avg_makespan)) \
								+ " \t" + " " + '%.3f' %(sum(avg_reward)/len(avg_reward)))