예제 #1
0
    def __init__(self, args):
        self.args = args
        self.pool = multiprocessing.Pool(args.worker)
        env = gym.make(args.env_name)
        o_dim = env.observation_space.shape[0]
        s_dim = 128
        a_dim = env.action_space.n

        self.population_status = []
        for _ in range(args.population):
            individual_status = {}
            name = ''.join(
                random.choice(string.ascii_letters + string.digits)
                for _ in range(8))
            individual_status['name'] = name
            env_name = self.args.env_name
            individual_status['env_name'] = env_name
            policy_net = PolicyNet(o_dim, s_dim, a_dim)
            policy_net.share_memory()
            individual_status['policy_net'] = policy_net
            evaluate_net = EvaluateNet(o_dim, s_dim, a_dim)
            evaluate_net.share_memory()
            individual_status['evolution_net'] = evaluate_net
            steps = self.args.step_per_generation
            individual_status['steps'] = steps
            self.population_status.append(individual_status)
예제 #2
0
if __name__ == "__main__":
    writer = SummaryWriter("./log")

    env = gym.make("Pong-v0")
    MAXSTEP = 6
    NWORKERS = 4
    EPOCHSTEP = 4000 * 1024 // (MAXSTEP * BATCHSIZE * NWORKERS
                                )  # around ~4000 1 EPOCH in A3C paper
    print("1 epoch contains {} steps".format(EPOCHSTEP))
    NEPOCH = 100 * EPOCHSTEP
    GAMMA = 0.99
    NFRAMES = 4

    policy_net = PolicyNet(NFRAMES)
    policy_net.cuda()
    policy_net.share_memory()  # make it store in shared memory
    opt = optim.RMSprop(policy_net.parameters(), lr=5e-4, alpha=0.99, eps=1e-5)

    samplers = [
        EnvSampler(env, policy_net, NFRAMES, MAXSTEP, GAMMA)
        for _ in range(NWORKERS)
    ]
    global_step = 0

    ctx = mp.get_context('spawn')
    queue = ctx.Queue()
    event = ctx.Event()

    workers = []
    for i in range(NWORKERS):
        worker = ctx.Process(target=sample,