Ejemplo n.º 1
0
# hyper Parameters
num_state_features = env.observation_space.shape[0]
num_actions = env.action_space.n
learning_rate = 5e-4
num_iterations = 10000
memory_size = 10000
batch_size = 1024
update_target_net_every = 5

# hyper parameters for e-greedy policy
min_e = .01
max_e = .6
e_decay = 5000.0

# define brain
brain = Agent(num_state_features, num_actions, memory_size).cuda()
brain_prime = Agent(num_state_features, num_actions, memory_size).cuda()
brain_prime.net.load_state_dict(brain.net.state_dict())

optimizer = torch.optim.RMSprop(brain.net.parameters(), lr=learning_rate)


def main():
    eps_step_tracker = 0
    loss_tracker = AverageMeter()
    for i in range(num_iterations):
        state = env.reset()
        ep_length = 0
        ep_reward = 0
        while True:
            # env.render()
Ejemplo n.º 2
0
import sys
sys.path.append('../Prim-Agent')
import os
from environment import Environment
from network import Net, Agent
from utils import utils
sys.path.append('../demo')


os.environ['CUDA_VISIBLE_DEVICES'] = '0'

#initialize
env = Environment()
agent = Agent('eval/prim.pth')

#save path
save_path='prim_result/'
utils.check_dirs([save_path])

shape_infopack=['demo', 'eval/demo-16.binvox', 'eval/demo-64.binvox', 'rgb', 'demo.png']
s, box, step = env.reset(shape_infopack)
acm_r=0

step_interval=20

while True:

    valid_mask = env.get_valid_action_mask(box)
    a = agent.choose_action(s, box, step, valid_mask, 1.0)
    s_, box_, step_, r, done = env.next(a)
                
Ejemplo n.º 3
0
    return np.mean(all_reward)


if __name__ == "__main__":

    args = parse_args()

    shape_ref_type = args.reference
    shape_category = args.category
    shape_ref_path = args.data_root + 'shape_reference/' + shape_ref_type + '/' + shape_category + '/'
    shape_vox_path = args.data_root + 'shape_binvox/' + shape_category + '/'

    load_net_path = args.load_net + shape_ref_type + '/' + shape_category + '.pth'
    test_shapelist_path = args.data_root + 'shape_list/' + shape_category + '/' + shape_category + '-test.txt'

    save_result_path = args.save_result + shape_ref_type + '/' + shape_category + '/'
    utils.check_dirs([save_result_path])

    #GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    #shape list
    test_shapelist = utils.load_filelist(test_shapelist_path)

    #initialize
    env = Environment()
    agent = Agent(load_net_path)
    mean_reward = test(agent, env, test_shapelist)

    print(mean_reward)
Ejemplo n.º 4
0
num_md = 15
num_task = 15
num_bs = 3

env = ENV(num_md, num_task, num_bs)

# 参数??
# UEnet = Agent(alpha=0.0005, beta=0.005, input_dims=8, tau=0.01, \
#               env=None, batch_size=64, layer1_size=500, layer2_size=300,
#               n_actions=1)
MECSnet = Agent(alpha=0.0004,
                beta=0.004,
                input_dims=num_task * 3 + num_bs,
                tau=0.01,
                env=env,
                batch_size=64,
                layer1_size=500,
                layer2_size=300,
                n_actions=3)

# alpha beta tau batch_size

score_record = []
score_record_step = []
count_record = []
count_record_step = []
time_record = []
time_record_step = []
for i in range(800):
    done = False
Ejemplo n.º 5
0
    save_net_f = args.save_net_f
    save_log_path = args.save_log
    save_tmp_result_path = args.save_tmp_result

    utils.check_dirs([save_net_path, save_log_path, save_tmp_result_path])

    #GPU
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    #shape list
    IL_shape_list = utils.load_filelist(IL_shapelist_path)
    RL_shape_list = utils.load_filelist(RL_shapelist_path)

    #initialize
    env = Environment()
    agent = Agent()
    writer = SummaryWriter(save_log_path)

    imitation_learning(agent, env, writer, IL_shape_list)
    torch.save(
        agent.eval_net.state_dict(), save_net_path + 'eval_IL_' +
        shape_ref_type + '_' + shape_category + '.pth')
    torch.save(
        agent.target_net.state_dict(), save_net_path + 'target_IL_' +
        shape_ref_type + '_' + shape_category + '.pth')
    agent.memory_self.clear()

    reinforcement_learning(agent, env, writer, RL_shape_list)
    torch.save(
        agent.eval_net.state_dict(), save_net_path + 'eval_RL_' +
        shape_ref_type + '_' + shape_category + '.pth')
Ejemplo n.º 6
0
gym.logger.set_level(gym.logger.ERROR)  # Disable Gym warnings

counter = Counter()

tf.set_random_seed(args.seed)
sess = tf.Session()

with tf.device('/cpu:0'):
    global_net = ActorCriticNet(args, 'global_net', sess)
    average_net = ActorCriticNet(args, 'average_net', sess)
    test_net = ActorCriticNet(args, 'test', sess)
    agents = []
    for i in range(args.num_agents):
        agent_name = 'Agent_%i' % i
        agents.append(Agent(args, i, agent_name, global_net, average_net,
                            sess))

if args.output_graph:
    tf.summary.FileWriter("logs/", sess.graph)

coord = tf.train.Coordinator()
sess.run(tf.global_variables_initializer())
# initialize average network weights to global network weights
sess.run([
    tf.assign(a_p, g_p)
    for a_p, g_p in zip(average_net.a_params, global_net.a_params)
])


def job():
    agent.acer_main(counter, coord, average_net)
Ejemplo n.º 7
0
from tictactoe import TicTacToe
from network import Agent
from player import Player
import pickle


# player against ai
def play(player0, player1):

    game = TicTacToe(printing=True)
    game.print_board()

    # play game
    while game.winner == -1:
        if game.player == 0:
            winner = game.play(player0.get_input())
        else:
            winner = game.play(player1.get_input(board=game.board))
    return winner


if __name__ == '__main__':
    agent = input('Enter the name of the file to play:\n')
    agent = Agent.load(f'players/{agent}')
    player0 = Player()
    player1 = Player(agent=agent)

    replay = 'y'
    while replay == 'y':
        print(play(player0, player1), 'won the game.')
        replay = input('Play again?\ny/n\n')
Ejemplo n.º 8
0
#from utils import plotLearning

if __name__ == "__main__":
    # hyperparameters
    state_dim = 115
    # number of possible actions
    action_dim = 19
    # retention rate for actor and critic
    alpha = 0.0001
    beta = 0.0005
    gamma = .95
    # number of episodes to test
    n_episodes = 20

    # instantiate the actor
    agent = Agent(state_dim, action_dim, alpha, beta, gamma)
    # load the trained model
    agent.actor = torch.load('PPO_act6000.pth')
    display_train = False
    # render environment
    if display_train:
        env = football_env.create_environment(env_name='academy_empty_goal',
                                              representation='pixels',
                                              render=True)
    else:
        env = football_env.create_environment(env_name='academy_empty_goal',
                                              representation='simple115',
                                              render=True)
    # append the scores
    score_history = []
    # testing pipeline