Esempio n. 1
0
def create_network(num_layers=2, hidden_size=25):
    network = ValueNetwork(input_d=input_d,
                           output_d=actions_n,
                           num_layers=num_layers,
                           hidden_size=hidden_size)
    network.share_memory()
    return network
Esempio n. 2
0
    def load(weight_name,
             memory_name,
             inp_size,
             hidden,
             possible_action_function,
             optimizer,
             eps=0.3,
             reg_coef=0.01):
        net = ValueNetwork(inp_size, hidden, reg_coef=reg_coef)
        net.load_weights(weight_name)

        file = open(memory_name + '.pickle', 'rb')
        memory = pickle.load(file)

        return ValueAgent(net,
                          possible_action_function,
                          optimizer,
                          memory=memory,
                          eps=eps)
# These might include important parameters for your experiment,
# your models, torch's multiprocessing methods, etc.
if __name__ == "__main__" :

    parser = argparse.ArgumentParser()
    parser.add_argument('--port', type=int, default=2648, help="Base server port")
    parser.add_argument('--seed', type=int, default=2207,
	                  help="Python randomization seed; uses python default if 0 or not given")
    parser.add_argument('--num_processes', type=int, default=4)
    parser.add_argument('--num_episodes', type=int, default=20000)
    parser.add_argument('--name', type=str, help="name to be used for experiment logs",default="")

    args = parser.parse_args()

    # make the shared networks
    value_network = ValueNetwork()
    target_network = ValueNetwork()

    value_network.share_memory()
    target_network.share_memory()

	# Example on how to initialize global locks for processes
	# and counters.

    counter = mp.Value('i', 0)
    lock = mp.Lock()

    processes = []

    # how to asynchronously call multiple instances of train
    for idx in range(0, args.num_processes):
Esempio n. 4
0
# Use this script to handle arguments and
# initialize important components of your experiment.
# These might include important parameters for your experiment, and initialization of
# your models, torch's multiprocessing methods, etc.
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_processes', type=int, default=8)
    parser.add_argument('--max_timestep', type=int, default=500)
    parser.add_argument('--iterate_target', type=int, default=500)
    parser.add_argument('--iterate_async', type=int, default=20)
    parser.add_argument('--discountFactor', type=float, default=0.99)
    parser.add_argument('--epsilon', type=float, default=1.)
    args = parser.parse_args()

    value_network = ValueNetwork(15, [15, 15], 4)
    saveModelNetwork(value_network, 'params/params_last')
    target_value_network = ValueNetwork(15, [15, 15], 4)
    optimizer = SharedAdam(value_network.parameters())
    optimizer.share_memory()

    # Example on how to initialize global locks for processes
    # and counters.
    counter = mp.Value('i', 0)
    lock = mp.Lock()

    processes = []

    # Example code to initialize torch multiprocessing.
    for idx in range(0, args.num_processes):
        trainingArgs = (idx, args, value_network, target_value_network,
Esempio n. 5
0
# Use this script to handle arguments and
# initialize important components of your experiment.
# These might include important parameters for your experiment,
# your models, torch's multiprocessing methods, etc.
if __name__ == "__main__":

    # Example on how to initialize global locks for processes
    # and counters.

    counter = mp.Value('i', 0)
    lock = mp.Lock()

    args = parser.parse_args()
    torch.manual_seed(args.seed)
    value_network, target_value_network = ValueNetwork(), ValueNetwork()
    # (params, lr, betas, eps, weight_decay)
    processes = []
    value_network.share_memory()
    target_value_network.share_memory()

    for idx in range(0, args.numprocesses):
        lr = args.lr[idx]
        optimizer = SharedAdam(value_network.parameters(), lr=lr)
        trainingArgs = (idx, args, value_network, target_value_network,
                        optimizer, lock, counter)
        p = mp.Process(target=train, args=trainingArgs)
        p.start()
        processes.append(p)

        args.port += 10
Esempio n. 6
0
    #	p = mp.Process(target=train, args=())
    #	p.start()
    #	processes.append(p)
    #for p in processes:
    #	p.join()

    args = parser.parse_args()

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    print("Using", device)

    torch.manual_seed(args.seed)
    mp.set_start_method('spawn')

    value_network = ValueNetwork().to(device)
    value_network.share_memory()

    optimizer = SharedAdam(value_network.parameters(), lr=1e-4)

    counter = mp.Value('i', 0)
    lock = mp.Lock()

    I_tar = 10
    I_async = 5

    processes = []
    name = ""  #str(datetime.now()) + "_"
    for idx in range(0, args.num_processes):

        target_value_network = ValueNetwork().to(device)
    # Example on how to initialize global locks for processes
    # and counters.

    numOpponents = 1
    args = parser.parse_args()
    results_dir = './Network_parameters'
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)

    counter = mp.Value('i', 0)

    lock = mp.Lock()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    val_network = ValueNetwork()
    val_network.share_memory()

    target_value_network = ValueNetwork()
    target_value_network.share_memory()

    hard_copy(val_network, target_value_network)

    # Shared optimizer ->  Share the gradients between the processes. Lazy allocation so gradients are not shared here
    optimizer = SharedAdam(params=val_network.parameters(), lr=args.lr)
    optimizer.share_memory()
    optimizer.zero_grad()
    timesteps_per_process = 32 * (10**6) // args.num_processes

    processes = []
    for idx in range(0, args.num_processes):
Esempio n. 8
0
                    type=float,
                    default=1e-4,
                    help='optimizer learning rate')
#parser.add_argument('--max_grads',type=float,default=1.0,help = 'max_grads')
parser.add_argument('--gamma', type=float, default=0.999, help='gamma')
parser.add_argument('--copy_freq', type=int, default=10000, help='copy_freq')
parser.add_argument('--aSyncFreq', type=int, default=10, help='aSyncFreq')
parser.add_argument('--numEpisodes',
                    type=int,
                    default=8000,
                    help='Number of episodes')

if __name__ == "__main__":
    args = parser.parse_args()

    value_network = ValueNetwork(15, [60, 60, 30], 4)
    value_network.share_memory()
    target_value_network = ValueNetwork(15, [60, 60, 30], 4)
    target_value_network.share_memory()

    print('lr', args.learning_rate)
    optimizer = SharedAdam(params=value_network.parameters(),
                           lr=args.learning_rate)
    optimizer.share_memory()

    counter = mp.Value('i', 0)

    lock = mp.Lock()
    processes = []
    for idx in range(0, args.num_processes):
        trainingArgs = (idx, args, value_network, target_value_network,
Esempio n. 9
0

if __name__ == "__main__":
    # Example on how to initialize global locks for processes
    # and counters.
    mp.set_start_method('spawn')
    args = get_args()
    if not os.path.exists(args.log_dir):
        os.mkdir(args.log_dir)

    if args.mode == 'train':
        num_processes = args.n_jobs
        counter = mp.Value('i', 0)
        lock = mp.Lock()

        valueNetwork = ValueNetwork()
        targetNetwork = ValueNetwork()

        if args.use_gpu:
            targetNetwork = targetNetwork.cuda()
            valueNetwork = valueNetwork.cuda()

        targetNetwork.load_state_dict(valueNetwork.state_dict())
        targetNetwork.eval()

        valueNetwork.share_memory()
        targetNetwork.share_memory()

        # create Shared Adam optimizer
        optimizer = SharedAdam(valueNetwork.parameters())
        optimizer.share_memory()
            total_steps += max_episode_length

    steps_per_goal = total_steps / num_episodes
    if (episode % 10) == 0:
        print(
            'Episode %d\tReal goals: %d/%d\tSteps: %d\tSteps per episode: %.1f'
            %
            (episode, total_goals, num_episodes, total_steps, steps_per_goal))

    f.write('Real goals: %d/%d\tSteps: %d\tSteps per episode: %.1f\n' %
            (total_goals, num_episodes, total_steps, steps_per_goal))
    f.flush()
    f.close()


value_network = ValueNetwork()

idx = 12
port = 8000 + idx * 20
seed = idx * 2

directories = [x[0] for x in os.walk('.') if 'results' in x[0]]

print('starting HFO')
hfoEnv = HFOEnv(numTeammates=0, numOpponents=1, port=port, seed=seed)
hfoEnv.connectToServer()

for i in range(4, 8):
    fn = 'params_' + str(i)
    print('Loading from ', fn)
    value_network.load_state_dict(torch.load(fn))
Esempio n. 11
0
def train(idx, args, learning_network, target_network, optimizer, lock,
          counter):
    # init port & seed for the thread based on id val
    port = 8100 + 10 * idx  # init
    seed = idx * 113 + 923
    torch.manual_seed(seed)
    worker_network = ValueNetwork(15, 4)
    worker_network.load_state_dict(learning_network.state_dict())
    # change init
    # init env
    hfo_env = HFOEnv(numTeammates=0, numOpponents=1, port=port, seed=seed)
    hfo_env.connectToServer()
    episode_num = 0
    eps = random.sample(epsilon_list, 1)[0]
    worker_timestep = 0
    mse_loss = nn.MSELoss()
    max_worker_steps = args.max_steps / args.num_processes
    can_continue = True
    goal = 0
    to_goal = []
    while can_continue:
        # run episode
        obs_tensor = hfo_env.reset()
        done = False
        loss = 0
        reward_ep = 0
        ep_steps = 0
        upd_steps = 0
        while not done:
            # select action based on greedy policy
            action_idx = select_action(obs_tensor, worker_network,
                                       worker_timestep, max_worker_steps, args,
                                       eps)
            action = hfo_env.possibleActions[action_idx]
            # observe next
            next_obs_tensor, reward, done, status, info = hfo_env.step(action)
            y = computeTargets(reward, next_obs_tensor, args.discount, done,
                               target_network)
            # compute predictions again for the best action. Here we change params
            q_next = computePrediction(obs_tensor, action_idx, worker_network)
            # put new state
            obs_tensor = next_obs_tensor
            # update episode stats
            loss += mse_loss(y, q_next)
            reward_ep += reward
            upd_steps += 1
            ep_steps += 1
            worker_timestep += 1
            if status == 1:
                goal += 1
                to_goal.append(ep_steps)
            with lock:
                counter.value += 1
                if counter.value % args.checkpoint_time == 0:
                    saveModelNetwork(
                        learning_network,
                        args.checkpoint_dir + '_{}'.format(counter.value))
            # if terminal or time to update network
            if done or worker_timestep % args.val_net_update_freq == 0:
                worker_network.zero_grad()
                optimizer.zero_grad()
                # take mean loss
                loss /= upd_steps
                loss.backward()
                sync_grad(learning_network, worker_network)
                optimizer.step()
                worker_network.load_state_dict(learning_network.state_dict())
                loss = 0
                upd_steps = 0
            # perform update of target network
            if counter.value % args.tgt_net_update_freq == 0:
                target_network.load_state_dict(learning_network.state_dict())

            if counter.value % args.checkpoint_time == 0:
                saveModelNetwork(
                    learning_network,
                    args.checkpoint_dir + '_{}'.format(counter.value))
        episode_num += 1

        # if time is exceeded -> break the loop
        can_continue = counter.value <= args.max_steps\
                       and worker_timestep <= max_worker_steps\
                       and status !=SERVER_DOWN \
                       and episode_num <= args.num_episodes # lets run just 8K episodes
    # finish the game
    hfo_env.quitGame()
    # save the network it stopped with
    saveModelNetwork(learning_network,
                     args.checkpoint_dir + '_{}_final'.format(counter.value))
Esempio n. 12
0
                    help='how many batches to wait before logging training status')
	parser.add_argument('--num-processes', type=int, default=8)
	parser.add_argument('--discountFactor', type=float, default=0.9)
	parser.add_argument('--target-interval', type=int, default=2000)
	parser.add_argument('--predict-interval', type=int, default=250)
	parser.add_argument('--timesteps', type=int, default=10000000)
	parser.add_argument('--tmax', type=float, default=32e6)
	parser.add_argument('--epsilon', type=float, default=0.9)
	parser.add_argument('--save-interval', type=int, default=100000)




	args=parser.parse_args()
	mp.set_start_method('spawn')
	value_network= ValueNetwork(68,[15,15],4)
	target_value_network= ValueNetwork(68,[15,15],4)
	target_value_network.load_state_dict(value_network.state_dict())
	value_network.share_memory()
	target_value_network.share_memory()
	counter = mp.Value('i', 0)
	lock = mp.Lock()
	optimizer = SharedAdam(value_network.parameters())
	#optimizer.share_memory()
	processes = []
	rank = 0
	for idx in range(0, args.num_processes):
		trainingArgs = (idx, args, value_network, target_value_network, optimizer, lock, counter)
		p = mp.Process(target=train, args=(idx, args, value_network, target_value_network, optimizer, lock, counter))
		rank += 1
		p.start()
Esempio n. 13
0
# Use this script to handle arguments and
# initialize important components of your experiment.
# These might include important parameters for your experiment, and initialization of
# your models, torch's multiprocessing methods, etc.
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--num_processes', type=int, default=8)
    parser.add_argument('--max_timestep', type=int, default=500)
    parser.add_argument('--iterate_target', type=int, default=500)
    parser.add_argument('--iterate_async', type=int, default=20)
    parser.add_argument('--discountFactor', type=float, default=0.99)
    parser.add_argument('--epsilon', type=float, default=1.)
    args = parser.parse_args()

    target_value_network = ValueNetwork(15, [15, 15], 4)

    # Example on how to initialize global locks for processes
    # and counters.
    counter = mp.Value('i', 0)
    lock = mp.Lock()

    processes = []

    # Example code to initialize torch multiprocessing.
    for idx in range(0, args.num_processes):
        evaluateArgs = (idx, target_value_network, lock, counter)
        p = mp.Process(target=evaluate, args=evaluateArgs)
        p.start()
        processes.append(p)