# hyper Parameters num_state_features = env.observation_space.shape[0] num_actions = env.action_space.n learning_rate = 5e-4 num_iterations = 10000 memory_size = 10000 batch_size = 1024 update_target_net_every = 5 # hyper parameters for e-greedy policy min_e = .01 max_e = .6 e_decay = 5000.0 # define brain brain = Agent(num_state_features, num_actions, memory_size).cuda() brain_prime = Agent(num_state_features, num_actions, memory_size).cuda() brain_prime.net.load_state_dict(brain.net.state_dict()) optimizer = torch.optim.RMSprop(brain.net.parameters(), lr=learning_rate) def main(): eps_step_tracker = 0 loss_tracker = AverageMeter() for i in range(num_iterations): state = env.reset() ep_length = 0 ep_reward = 0 while True: # env.render()
import sys sys.path.append('../Prim-Agent') import os from environment import Environment from network import Net, Agent from utils import utils sys.path.append('../demo') os.environ['CUDA_VISIBLE_DEVICES'] = '0' #initialize env = Environment() agent = Agent('eval/prim.pth') #save path save_path='prim_result/' utils.check_dirs([save_path]) shape_infopack=['demo', 'eval/demo-16.binvox', 'eval/demo-64.binvox', 'rgb', 'demo.png'] s, box, step = env.reset(shape_infopack) acm_r=0 step_interval=20 while True: valid_mask = env.get_valid_action_mask(box) a = agent.choose_action(s, box, step, valid_mask, 1.0) s_, box_, step_, r, done = env.next(a)
return np.mean(all_reward) if __name__ == "__main__": args = parse_args() shape_ref_type = args.reference shape_category = args.category shape_ref_path = args.data_root + 'shape_reference/' + shape_ref_type + '/' + shape_category + '/' shape_vox_path = args.data_root + 'shape_binvox/' + shape_category + '/' load_net_path = args.load_net + shape_ref_type + '/' + shape_category + '.pth' test_shapelist_path = args.data_root + 'shape_list/' + shape_category + '/' + shape_category + '-test.txt' save_result_path = args.save_result + shape_ref_type + '/' + shape_category + '/' utils.check_dirs([save_result_path]) #GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu #shape list test_shapelist = utils.load_filelist(test_shapelist_path) #initialize env = Environment() agent = Agent(load_net_path) mean_reward = test(agent, env, test_shapelist) print(mean_reward)
num_md = 15 num_task = 15 num_bs = 3 env = ENV(num_md, num_task, num_bs) # 参数?? # UEnet = Agent(alpha=0.0005, beta=0.005, input_dims=8, tau=0.01, \ # env=None, batch_size=64, layer1_size=500, layer2_size=300, # n_actions=1) MECSnet = Agent(alpha=0.0004, beta=0.004, input_dims=num_task * 3 + num_bs, tau=0.01, env=env, batch_size=64, layer1_size=500, layer2_size=300, n_actions=3) # alpha beta tau batch_size score_record = [] score_record_step = [] count_record = [] count_record_step = [] time_record = [] time_record_step = [] for i in range(800): done = False
save_net_f = args.save_net_f save_log_path = args.save_log save_tmp_result_path = args.save_tmp_result utils.check_dirs([save_net_path, save_log_path, save_tmp_result_path]) #GPU os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu #shape list IL_shape_list = utils.load_filelist(IL_shapelist_path) RL_shape_list = utils.load_filelist(RL_shapelist_path) #initialize env = Environment() agent = Agent() writer = SummaryWriter(save_log_path) imitation_learning(agent, env, writer, IL_shape_list) torch.save( agent.eval_net.state_dict(), save_net_path + 'eval_IL_' + shape_ref_type + '_' + shape_category + '.pth') torch.save( agent.target_net.state_dict(), save_net_path + 'target_IL_' + shape_ref_type + '_' + shape_category + '.pth') agent.memory_self.clear() reinforcement_learning(agent, env, writer, RL_shape_list) torch.save( agent.eval_net.state_dict(), save_net_path + 'eval_RL_' + shape_ref_type + '_' + shape_category + '.pth')
gym.logger.set_level(gym.logger.ERROR) # Disable Gym warnings counter = Counter() tf.set_random_seed(args.seed) sess = tf.Session() with tf.device('/cpu:0'): global_net = ActorCriticNet(args, 'global_net', sess) average_net = ActorCriticNet(args, 'average_net', sess) test_net = ActorCriticNet(args, 'test', sess) agents = [] for i in range(args.num_agents): agent_name = 'Agent_%i' % i agents.append(Agent(args, i, agent_name, global_net, average_net, sess)) if args.output_graph: tf.summary.FileWriter("logs/", sess.graph) coord = tf.train.Coordinator() sess.run(tf.global_variables_initializer()) # initialize average network weights to global network weights sess.run([ tf.assign(a_p, g_p) for a_p, g_p in zip(average_net.a_params, global_net.a_params) ]) def job(): agent.acer_main(counter, coord, average_net)
from tictactoe import TicTacToe from network import Agent from player import Player import pickle # player against ai def play(player0, player1): game = TicTacToe(printing=True) game.print_board() # play game while game.winner == -1: if game.player == 0: winner = game.play(player0.get_input()) else: winner = game.play(player1.get_input(board=game.board)) return winner if __name__ == '__main__': agent = input('Enter the name of the file to play:\n') agent = Agent.load(f'players/{agent}') player0 = Player() player1 = Player(agent=agent) replay = 'y' while replay == 'y': print(play(player0, player1), 'won the game.') replay = input('Play again?\ny/n\n')
#from utils import plotLearning if __name__ == "__main__": # hyperparameters state_dim = 115 # number of possible actions action_dim = 19 # retention rate for actor and critic alpha = 0.0001 beta = 0.0005 gamma = .95 # number of episodes to test n_episodes = 20 # instantiate the actor agent = Agent(state_dim, action_dim, alpha, beta, gamma) # load the trained model agent.actor = torch.load('PPO_act6000.pth') display_train = False # render environment if display_train: env = football_env.create_environment(env_name='academy_empty_goal', representation='pixels', render=True) else: env = football_env.create_environment(env_name='academy_empty_goal', representation='simple115', render=True) # append the scores score_history = [] # testing pipeline