def create_network(num_layers=2, hidden_size=25): network = ValueNetwork(input_d=input_d, output_d=actions_n, num_layers=num_layers, hidden_size=hidden_size) network.share_memory() return network
def load(weight_name, memory_name, inp_size, hidden, possible_action_function, optimizer, eps=0.3, reg_coef=0.01): net = ValueNetwork(inp_size, hidden, reg_coef=reg_coef) net.load_weights(weight_name) file = open(memory_name + '.pickle', 'rb') memory = pickle.load(file) return ValueAgent(net, possible_action_function, optimizer, memory=memory, eps=eps)
# These might include important parameters for your experiment, # your models, torch's multiprocessing methods, etc. if __name__ == "__main__" : parser = argparse.ArgumentParser() parser.add_argument('--port', type=int, default=2648, help="Base server port") parser.add_argument('--seed', type=int, default=2207, help="Python randomization seed; uses python default if 0 or not given") parser.add_argument('--num_processes', type=int, default=4) parser.add_argument('--num_episodes', type=int, default=20000) parser.add_argument('--name', type=str, help="name to be used for experiment logs",default="") args = parser.parse_args() # make the shared networks value_network = ValueNetwork() target_network = ValueNetwork() value_network.share_memory() target_network.share_memory() # Example on how to initialize global locks for processes # and counters. counter = mp.Value('i', 0) lock = mp.Lock() processes = [] # how to asynchronously call multiple instances of train for idx in range(0, args.num_processes):
# Use this script to handle arguments and # initialize important components of your experiment. # These might include important parameters for your experiment, and initialization of # your models, torch's multiprocessing methods, etc. if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--num_processes', type=int, default=8) parser.add_argument('--max_timestep', type=int, default=500) parser.add_argument('--iterate_target', type=int, default=500) parser.add_argument('--iterate_async', type=int, default=20) parser.add_argument('--discountFactor', type=float, default=0.99) parser.add_argument('--epsilon', type=float, default=1.) args = parser.parse_args() value_network = ValueNetwork(15, [15, 15], 4) saveModelNetwork(value_network, 'params/params_last') target_value_network = ValueNetwork(15, [15, 15], 4) optimizer = SharedAdam(value_network.parameters()) optimizer.share_memory() # Example on how to initialize global locks for processes # and counters. counter = mp.Value('i', 0) lock = mp.Lock() processes = [] # Example code to initialize torch multiprocessing. for idx in range(0, args.num_processes): trainingArgs = (idx, args, value_network, target_value_network,
# Use this script to handle arguments and # initialize important components of your experiment. # These might include important parameters for your experiment, # your models, torch's multiprocessing methods, etc. if __name__ == "__main__": # Example on how to initialize global locks for processes # and counters. counter = mp.Value('i', 0) lock = mp.Lock() args = parser.parse_args() torch.manual_seed(args.seed) value_network, target_value_network = ValueNetwork(), ValueNetwork() # (params, lr, betas, eps, weight_decay) processes = [] value_network.share_memory() target_value_network.share_memory() for idx in range(0, args.numprocesses): lr = args.lr[idx] optimizer = SharedAdam(value_network.parameters(), lr=lr) trainingArgs = (idx, args, value_network, target_value_network, optimizer, lock, counter) p = mp.Process(target=train, args=trainingArgs) p.start() processes.append(p) args.port += 10
# p = mp.Process(target=train, args=()) # p.start() # processes.append(p) #for p in processes: # p.join() args = parser.parse_args() use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print("Using", device) torch.manual_seed(args.seed) mp.set_start_method('spawn') value_network = ValueNetwork().to(device) value_network.share_memory() optimizer = SharedAdam(value_network.parameters(), lr=1e-4) counter = mp.Value('i', 0) lock = mp.Lock() I_tar = 10 I_async = 5 processes = [] name = "" #str(datetime.now()) + "_" for idx in range(0, args.num_processes): target_value_network = ValueNetwork().to(device)
# Example on how to initialize global locks for processes # and counters. numOpponents = 1 args = parser.parse_args() results_dir = './Network_parameters' if not os.path.exists(results_dir): os.makedirs(results_dir) counter = mp.Value('i', 0) lock = mp.Lock() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") val_network = ValueNetwork() val_network.share_memory() target_value_network = ValueNetwork() target_value_network.share_memory() hard_copy(val_network, target_value_network) # Shared optimizer -> Share the gradients between the processes. Lazy allocation so gradients are not shared here optimizer = SharedAdam(params=val_network.parameters(), lr=args.lr) optimizer.share_memory() optimizer.zero_grad() timesteps_per_process = 32 * (10**6) // args.num_processes processes = [] for idx in range(0, args.num_processes):
type=float, default=1e-4, help='optimizer learning rate') #parser.add_argument('--max_grads',type=float,default=1.0,help = 'max_grads') parser.add_argument('--gamma', type=float, default=0.999, help='gamma') parser.add_argument('--copy_freq', type=int, default=10000, help='copy_freq') parser.add_argument('--aSyncFreq', type=int, default=10, help='aSyncFreq') parser.add_argument('--numEpisodes', type=int, default=8000, help='Number of episodes') if __name__ == "__main__": args = parser.parse_args() value_network = ValueNetwork(15, [60, 60, 30], 4) value_network.share_memory() target_value_network = ValueNetwork(15, [60, 60, 30], 4) target_value_network.share_memory() print('lr', args.learning_rate) optimizer = SharedAdam(params=value_network.parameters(), lr=args.learning_rate) optimizer.share_memory() counter = mp.Value('i', 0) lock = mp.Lock() processes = [] for idx in range(0, args.num_processes): trainingArgs = (idx, args, value_network, target_value_network,
if __name__ == "__main__": # Example on how to initialize global locks for processes # and counters. mp.set_start_method('spawn') args = get_args() if not os.path.exists(args.log_dir): os.mkdir(args.log_dir) if args.mode == 'train': num_processes = args.n_jobs counter = mp.Value('i', 0) lock = mp.Lock() valueNetwork = ValueNetwork() targetNetwork = ValueNetwork() if args.use_gpu: targetNetwork = targetNetwork.cuda() valueNetwork = valueNetwork.cuda() targetNetwork.load_state_dict(valueNetwork.state_dict()) targetNetwork.eval() valueNetwork.share_memory() targetNetwork.share_memory() # create Shared Adam optimizer optimizer = SharedAdam(valueNetwork.parameters()) optimizer.share_memory()
total_steps += max_episode_length steps_per_goal = total_steps / num_episodes if (episode % 10) == 0: print( 'Episode %d\tReal goals: %d/%d\tSteps: %d\tSteps per episode: %.1f' % (episode, total_goals, num_episodes, total_steps, steps_per_goal)) f.write('Real goals: %d/%d\tSteps: %d\tSteps per episode: %.1f\n' % (total_goals, num_episodes, total_steps, steps_per_goal)) f.flush() f.close() value_network = ValueNetwork() idx = 12 port = 8000 + idx * 20 seed = idx * 2 directories = [x[0] for x in os.walk('.') if 'results' in x[0]] print('starting HFO') hfoEnv = HFOEnv(numTeammates=0, numOpponents=1, port=port, seed=seed) hfoEnv.connectToServer() for i in range(4, 8): fn = 'params_' + str(i) print('Loading from ', fn) value_network.load_state_dict(torch.load(fn))
def train(idx, args, learning_network, target_network, optimizer, lock, counter): # init port & seed for the thread based on id val port = 8100 + 10 * idx # init seed = idx * 113 + 923 torch.manual_seed(seed) worker_network = ValueNetwork(15, 4) worker_network.load_state_dict(learning_network.state_dict()) # change init # init env hfo_env = HFOEnv(numTeammates=0, numOpponents=1, port=port, seed=seed) hfo_env.connectToServer() episode_num = 0 eps = random.sample(epsilon_list, 1)[0] worker_timestep = 0 mse_loss = nn.MSELoss() max_worker_steps = args.max_steps / args.num_processes can_continue = True goal = 0 to_goal = [] while can_continue: # run episode obs_tensor = hfo_env.reset() done = False loss = 0 reward_ep = 0 ep_steps = 0 upd_steps = 0 while not done: # select action based on greedy policy action_idx = select_action(obs_tensor, worker_network, worker_timestep, max_worker_steps, args, eps) action = hfo_env.possibleActions[action_idx] # observe next next_obs_tensor, reward, done, status, info = hfo_env.step(action) y = computeTargets(reward, next_obs_tensor, args.discount, done, target_network) # compute predictions again for the best action. Here we change params q_next = computePrediction(obs_tensor, action_idx, worker_network) # put new state obs_tensor = next_obs_tensor # update episode stats loss += mse_loss(y, q_next) reward_ep += reward upd_steps += 1 ep_steps += 1 worker_timestep += 1 if status == 1: goal += 1 to_goal.append(ep_steps) with lock: counter.value += 1 if counter.value % args.checkpoint_time == 0: saveModelNetwork( learning_network, args.checkpoint_dir + '_{}'.format(counter.value)) # if terminal or time to update network if done or worker_timestep % args.val_net_update_freq == 0: worker_network.zero_grad() optimizer.zero_grad() # take mean loss loss /= upd_steps loss.backward() sync_grad(learning_network, worker_network) optimizer.step() worker_network.load_state_dict(learning_network.state_dict()) loss = 0 upd_steps = 0 # perform update of target network if counter.value % args.tgt_net_update_freq == 0: target_network.load_state_dict(learning_network.state_dict()) if counter.value % args.checkpoint_time == 0: saveModelNetwork( learning_network, args.checkpoint_dir + '_{}'.format(counter.value)) episode_num += 1 # if time is exceeded -> break the loop can_continue = counter.value <= args.max_steps\ and worker_timestep <= max_worker_steps\ and status !=SERVER_DOWN \ and episode_num <= args.num_episodes # lets run just 8K episodes # finish the game hfo_env.quitGame() # save the network it stopped with saveModelNetwork(learning_network, args.checkpoint_dir + '_{}_final'.format(counter.value))
help='how many batches to wait before logging training status') parser.add_argument('--num-processes', type=int, default=8) parser.add_argument('--discountFactor', type=float, default=0.9) parser.add_argument('--target-interval', type=int, default=2000) parser.add_argument('--predict-interval', type=int, default=250) parser.add_argument('--timesteps', type=int, default=10000000) parser.add_argument('--tmax', type=float, default=32e6) parser.add_argument('--epsilon', type=float, default=0.9) parser.add_argument('--save-interval', type=int, default=100000) args=parser.parse_args() mp.set_start_method('spawn') value_network= ValueNetwork(68,[15,15],4) target_value_network= ValueNetwork(68,[15,15],4) target_value_network.load_state_dict(value_network.state_dict()) value_network.share_memory() target_value_network.share_memory() counter = mp.Value('i', 0) lock = mp.Lock() optimizer = SharedAdam(value_network.parameters()) #optimizer.share_memory() processes = [] rank = 0 for idx in range(0, args.num_processes): trainingArgs = (idx, args, value_network, target_value_network, optimizer, lock, counter) p = mp.Process(target=train, args=(idx, args, value_network, target_value_network, optimizer, lock, counter)) rank += 1 p.start()
# Use this script to handle arguments and # initialize important components of your experiment. # These might include important parameters for your experiment, and initialization of # your models, torch's multiprocessing methods, etc. if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--num_processes', type=int, default=8) parser.add_argument('--max_timestep', type=int, default=500) parser.add_argument('--iterate_target', type=int, default=500) parser.add_argument('--iterate_async', type=int, default=20) parser.add_argument('--discountFactor', type=float, default=0.99) parser.add_argument('--epsilon', type=float, default=1.) args = parser.parse_args() target_value_network = ValueNetwork(15, [15, 15], 4) # Example on how to initialize global locks for processes # and counters. counter = mp.Value('i', 0) lock = mp.Lock() processes = [] # Example code to initialize torch multiprocessing. for idx in range(0, args.num_processes): evaluateArgs = (idx, target_value_network, lock, counter) p = mp.Process(target=evaluate, args=evaluateArgs) p.start() processes.append(p)