def __init__(self, args): self.args = args self.pool = multiprocessing.Pool(args.worker) env = gym.make(args.env_name) o_dim = env.observation_space.shape[0] s_dim = 128 a_dim = env.action_space.n self.population_status = [] for _ in range(args.population): individual_status = {} name = ''.join( random.choice(string.ascii_letters + string.digits) for _ in range(8)) individual_status['name'] = name env_name = self.args.env_name individual_status['env_name'] = env_name policy_net = PolicyNet(o_dim, s_dim, a_dim) policy_net.share_memory() individual_status['policy_net'] = policy_net evaluate_net = EvaluateNet(o_dim, s_dim, a_dim) evaluate_net.share_memory() individual_status['evolution_net'] = evaluate_net steps = self.args.step_per_generation individual_status['steps'] = steps self.population_status.append(individual_status)
if __name__ == "__main__": writer = SummaryWriter("./log") env = gym.make("Pong-v0") MAXSTEP = 6 NWORKERS = 4 EPOCHSTEP = 4000 * 1024 // (MAXSTEP * BATCHSIZE * NWORKERS ) # around ~4000 1 EPOCH in A3C paper print("1 epoch contains {} steps".format(EPOCHSTEP)) NEPOCH = 100 * EPOCHSTEP GAMMA = 0.99 NFRAMES = 4 policy_net = PolicyNet(NFRAMES) policy_net.cuda() policy_net.share_memory() # make it store in shared memory opt = optim.RMSprop(policy_net.parameters(), lr=5e-4, alpha=0.99, eps=1e-5) samplers = [ EnvSampler(env, policy_net, NFRAMES, MAXSTEP, GAMMA) for _ in range(NWORKERS) ] global_step = 0 ctx = mp.get_context('spawn') queue = ctx.Queue() event = ctx.Event() workers = [] for i in range(NWORKERS): worker = ctx.Process(target=sample,