Exemple #1
0
	def __init__(self, id, algo_name, state_dim, action_dim, hidden_size, actor_lr, critic_lr, gamma, tau, savetag, foldername, actualize, use_gpu, num_agents, init_w = True):

		self.algo_name = algo_name; self.gamma = gamma; self.tau = tau; self.total_update = 0; self.agent_id = id;self.use_gpu = use_gpu
		self.tracker = utils.Tracker(foldername, ['q_'+savetag, 'qloss_'+savetag, 'policy_loss_'+savetag], '.csv', save_iteration=1000, conv_size=1000)
		self.num_agents = num_agents

		#Initialize actors
		self.policy = MultiHeadActor(state_dim, action_dim, hidden_size, num_agents)
		if init_w: self.policy.apply(utils.init_weights)
		self.policy_target = MultiHeadActor(state_dim, action_dim, hidden_size, num_agents)
		utils.hard_update(self.policy_target, self.policy)
		self.policy_optim = Adam(self.policy.parameters(), actor_lr)


		self.critics = [QNetwork(state_dim*num_agents, action_dim*num_agents, hidden_size*2) for _ in range(num_agents)]
		self.critics_target = [QNetwork(state_dim*num_agents, action_dim*num_agents, hidden_size*2) for _ in range(num_agents)]
		if init_w:
			for critic, critic_target in zip(self.critics, self.critics_target):
				critic.apply(utils.init_weights)
				utils.hard_update(critic_target, critic)
		self.critic_optims = [Adam(critic.parameters(), critic_lr) for critic in self.critics]


		self.loss = nn.MSELoss()

		if use_gpu:
			self.policy_target.cuda(); self.policy.cuda()
			for critic, critic_target in zip(self.critics, self.critics_target):
				critic.cuda()
				critic_target.cuda()


		self.num_critic_updates = 0

		#Statistics Tracker
		#self.action_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.policy_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.q_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.q = {'min':None, 'max': None, 'mean':None, 'std':None}
Exemple #2
0
	def __init__(self, id, algo_name, state_dim, action_dim, hidden_size, actor_lr, critic_lr, gamma, tau, savetag, foldername, actualize, use_gpu, num_agents, init_w = True):

		self.algo_name = algo_name; self.gamma = gamma; self.tau = tau; self.total_update = 0; self.agent_id = id;	self.actualize = actualize; self.use_gpu = use_gpu
		self.tracker = utils.Tracker(foldername, ['q_'+savetag, 'qloss_'+savetag, 'policy_loss_'+savetag, 'alz_score'+savetag,'alz_policy'+savetag], '.csv', save_iteration=1000, conv_size=1000)

		#Initialize actors
		self.policy = MultiHeadActor(state_dim, action_dim, hidden_size, num_agents)
		if init_w: self.policy.apply(utils.init_weights)
		self.policy_target = MultiHeadActor(state_dim, action_dim, hidden_size, num_agents)
		utils.hard_update(self.policy_target, self.policy)
		self.policy_optim = Adam(self.policy.parameters(), actor_lr)


		self.critic = QNetwork(state_dim, action_dim,hidden_size)
		if init_w: self.critic.apply(utils.init_weights)
		self.critic_target = QNetwork(state_dim, action_dim, hidden_size)
		utils.hard_update(self.critic_target, self.critic)
		self.critic_optim = Adam(self.critic.parameters(), critic_lr)

		if actualize:
			self.ANetwork = ActualizationNetwork(state_dim, action_dim, hidden_size)
			if init_w: self.ANetwork.apply(utils.init_weights)
			self.actualize_optim = Adam(self.ANetwork.parameters(), critic_lr)
			self.actualize_lr = 0.2
			if use_gpu: self.ANetwork.cuda()

		self.loss = nn.MSELoss()

		if use_gpu:
			self.policy_target.cuda(); self.critic_target.cuda(); self.policy.cuda(); self.critic.cuda()
		self.num_critic_updates = 0

		#Statistics Tracker
		#self.action_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.policy_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.q_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.q = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.alz_score = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.alz_policy = {'min':None, 'max': None, 'mean':None, 'std':None}
Exemple #3
0
	def __init__(self, args, id):
		self.args = args
		self.id = id

		###Initalize neuroevolution module###
		self.evolver = SSNE(self.args)

		########Initialize population
		self.manager = Manager()
		self.popn = self.manager.list()
		for _ in range(args.popn_size):
			self.popn.append(MultiHeadActor(args.state_dim, args.action_dim, args.hidden_size, args.config.num_agents))
			self.popn[-1].eval()

		#### INITIALIZE PG ALGO #####
		if self.args.is_matd3 or args.is_maddpg:
			algo_name = 'TD3' if self.args.is_matd3 else 'DDPG'
			self.algo = MATD3(id, algo_name, args.state_dim, args.action_dim, args.hidden_size, args.actor_lr,
			                args.critic_lr, args.gamma, args.tau, args.savetag, args.aux_save, 
			                args.use_gpu, args.config.num_agents, args.init_w)

		else:
			self.algo = MultiTD3(id, 'TD3', args.state_dim, args.action_dim, args.hidden_size, args.actor_lr,
			                args.critic_lr, args.gamma, args.tau, args.savetag, args.aux_save,
			                args.use_gpu, args.config.num_agents, args.init_w)


		#### Rollout Actor is a template used for MP #####
		self.rollout_actor = self.manager.list()
		self.rollout_actor.append(MultiHeadActor(args.state_dim, args.action_dim, args.hidden_size, args.config.num_agents))

		#Initalize buffer
		self.buffer = [Buffer(args.buffer_size, buffer_gpu=False) for _ in range(args.config.num_agents)]

		#Agent metrics
		self.fitnesses = [[] for _ in range(args.popn_size)]

		###Best Policy HOF####
		self.champ_ind = 0
Exemple #4
0


if __name__ == "__main__":
	args = Parameters()  # Create the Parameters class
	train_env = RoverDomainPython(args, 10)
	test_env = RoverDomainPython(args, 100)


	#test_tracker = utils.Tracker(args.metric_save, [args.log_fname], '.csv')  # Initiate tracker
	torch.manual_seed(args.seed);
	np.random.seed(args.seed);
	random.seed(args.seed)  # Seeds

	total_frames = 0; all_scores = [-1.0]; all_test = [-1.0]
	model = MultiHeadActor(args.state_dim, args.action_dim, args.hidden_size, args.config.num_agents)


	print_threshold = 1000000
	###### TRAINING LOOP ########
	while True:

		if args.dist == 'uniform':
			model.apply(sample_weight_uniform)
		elif args.dist == 'normal':
			model.apply(sample_weight_normal)
		else:
			Exception('Unknown distribution')

		score, frame = evaluate(train_env, model, 10)
		total_frames += frame
Exemple #5
0
    def __init__(self, args, id):
        self.args = args
        self.id = id

        ###Initalize neuroevolution module###
        self.evolver = SSNE(self.args)

        ########Initialize population
        self.manager = Manager()
        self.popn = self.manager.list()
        for _ in range(args.popn_size):
            if args.ps == 'trunk':
                self.popn.append(
                    MultiHeadActor(args.state_dim, args.action_dim,
                                   args.hidden_size, args.config.num_agents))

            else:
                if args.algo_name == 'TD3':
                    self.popn.append(
                        Actor(args.state_dim,
                              args.action_dim,
                              args.hidden_size,
                              policy_type='DeterministicPolicy'))
                else:
                    self.popn.append(
                        Actor(args.state_dim,
                              args.action_dim,
                              args.hidden_size,
                              policy_type='GaussianPolicy'))
            self.popn[-1].eval()

        #### INITIALIZE PG ALGO #####
        if args.ps == 'trunk':

            if self.args.is_matd3 or args.is_maddpg:
                algo_name = 'TD3' if self.args.is_matd3 else 'DDPG'
                self.algo = MATD3(id, algo_name, args.state_dim,
                                  args.action_dim, args.hidden_size,
                                  args.actor_lr, args.critic_lr, args.gamma,
                                  args.tau, args.savetag, args.aux_save,
                                  args.actualize, args.use_gpu,
                                  args.config.num_agents, args.init_w)

            else:
                self.algo = MultiTD3(id, args.algo_name, args.state_dim,
                                     args.action_dim, args.hidden_size,
                                     args.actor_lr, args.critic_lr, args.gamma,
                                     args.tau, args.savetag, args.aux_save,
                                     args.actualize, args.use_gpu,
                                     args.config.num_agents, args.init_w)

        else:
            if args.algo_name == 'TD3':
                self.algo = TD3(id, args.algo_name, args.state_dim,
                                args.action_dim, args.hidden_size,
                                args.actor_lr, args.critic_lr, args.gamma,
                                args.tau, args.savetag, args.aux_save,
                                args.actualize, args.use_gpu, args.init_w)
            else:
                self.algo = SAC(id, args.state_dim, args.action_dim,
                                args.hidden_size, args.gamma, args.critic_lr,
                                args.actor_lr, args.tau, args.alpha,
                                args.target_update_interval, args.savetag,
                                args.aux_save, args.actualize, args.use_gpu)

        #### Rollout Actor is a template used for MP #####
        self.rollout_actor = self.manager.list()

        if args.ps == 'trunk':
            self.rollout_actor.append(
                MultiHeadActor(args.state_dim, args.action_dim,
                               args.hidden_size, args.config.num_agents))
        else:
            if args.algo_name == 'TD3':
                self.rollout_actor.append(
                    Actor(args.state_dim,
                          args.action_dim,
                          args.hidden_size,
                          policy_type='DeterministicPolicy'))
            else:
                self.rollout_actor.append(
                    Actor(args.state_dim,
                          args.action_dim,
                          args.hidden_size,
                          policy_type='GaussianPolicy'))

        #Initalize buffer
        if args.ps == 'trunk':
            self.buffer = [
                Buffer(args.buffer_size,
                       buffer_gpu=False,
                       filter_c=args.filter_c)
                for _ in range(args.config.num_agents)
            ]
        else:
            self.buffer = Buffer(args.buffer_size,
                                 buffer_gpu=False,
                                 filter_c=args.filter_c)

        #Agent metrics
        self.fitnesses = [[] for _ in range(args.popn_size)]

        ###Best Policy HOF####
        self.champ_ind = 0
Exemple #6
0
    def __init__(self, args, id):
        self.args = args
        self.id = id

        #### Rollout Actor is a template used for MP #####
        self.manager = Manager()
        self.rollout_actor = self.manager.list()

        for agent_id in range(args.config.num_agents):

            if (self.args.EVALUATE):  # LOAD model

                #filename = self.args.model_directory + str(
                #	agent_id) + "_actor_pop10_roll50_envrover_heterogeneous_fire_truck_uav_long_range_lidar_action_" + str(self.args.action_space)+ "_seed" + str(self.args.seed)+"-rewardglobal"

                filename = self.args.model_directory + str(
                    agent_id
                ) + "_actor_pop10_roll50_envrover_heterogeneous_fire_truck_uav_long_range_lidar_action_" + str(
                    self.args.action_space) + "_seed" + str(
                        self.args.seed) + "-rewardglobal_pg"

                m = torch.load(filename)
                temp_model = Actor(args.state_dim,
                                   args.action_dim,
                                   args.hidden_size,
                                   policy_type='DeterministicPolicy')

                temp_model.load_state_dict(m)

                if args.ps == 'trunk':
                    self.rollout_actor.append(
                        MultiHeadActor(args.state_dim, args.action_dim,
                                       args.hidden_size,
                                       args.config.num_agents))
                else:
                    if args.algo_name == 'TD3':
                        #self.rollout_actor.append(Actor(args.state_dim, args.action_dim, args.hidden_size, policy_type='DeterministicPolicy').load_state_dict(torch.load(filename)))
                        self.rollout_actor.append(temp_model)
                    else:
                        self.rollout_actor.append(
                            Actor(args.state_dim,
                                  args.action_dim,
                                  args.hidden_size,
                                  policy_type='GaussianPolicy'))
            else:

                if args.ps == 'trunk':
                    self.rollout_actor.append(
                        MultiHeadActor(args.state_dim, args.action_dim,
                                       args.hidden_size,
                                       args.config.num_agents))
                else:
                    if args.algo_name == 'TD3':
                        self.rollout_actor.append(
                            Actor(args.state_dim,
                                  args.action_dim,
                                  args.hidden_size,
                                  policy_type='DeterministicPolicy'))
                    else:
                        self.rollout_actor.append(
                            Actor(args.state_dim,
                                  args.action_dim,
                                  args.hidden_size,
                                  policy_type='GaussianPolicy'))

            if self.args.ps == 'full' or self.args.ps == 'trunk':
                break  #Only need one for homogeneous workloads