예제 #1
0
def local_test(index, opt, global_model):
    torch.manual_seed(123 + index)
    env, num_states, num_actions = create_train_env(opt.world, opt.stage, opt.action_type)
    local_model = ActorCritic(num_states, num_actions)
    local_model.eval()
    state = torch.from_numpy(env.reset())
    done = True
    curr_step = 0
    actions = deque(maxlen=opt.max_actions)
    while True:
        curr_step += 1
        if done:
            local_model.load_state_dict(global_model.state_dict())
        with torch.no_grad():
            if done:
                h_0 = torch.zeros((1, 512), dtype=torch.float)
                c_0 = torch.zeros((1, 512), dtype=torch.float)
            else:
                h_0 = h_0.detach()
                c_0 = c_0.detach()

        logits, value, h_0, c_0 = local_model(state, h_0, c_0)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        state, reward, done, _ = env.step(action)
        env.render()
        actions.append(action)
        if curr_step > opt.num_global_steps or actions.count(actions[0]) == actions.maxlen:
            done = True
        if done:
            curr_step = 0
            actions.clear()
            state = env.reset()
        state = torch.from_numpy(state)
def test(opt):
    torch.manual_seed(123)
    env, num_states, num_actions = create_train_env(opt.layout)#,"{}/video_{}.mp4".format(opt.output_path, opt.layout))
    model = ActorCritic(num_states, num_actions)
    if torch.cuda.is_available():
        model.load_state_dict(torch.load("{}/gym-pacman_{}".format(opt.saved_path,opt.layout)))
        model.cuda()
    else:
        model.load_state_dict(torch.load("{}/gym-pacman_{}".format(opt.saved_path, opt.layout),
                                         map_location=lambda storage, loc: storage))
    model.eval()
    state = torch.from_numpy(env.reset())
    done = True
    while True:
        if done:
            h_0 = torch.zeros((1, 512), dtype=torch.float)
            c_0 = torch.zeros((1, 512), dtype=torch.float)
            env.reset()
        else:
            h_0 = h_0.detach()
            c_0 = c_0.detach()
        if torch.cuda.is_available():
            h_0 = h_0.cuda()
            c_0 = c_0.cuda()
            state = state.cuda()

        logits, value, h_0, c_0 = model(state, h_0, c_0)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        action = int(action)
        state, reward, done, info = env.step(action)
        state = torch.from_numpy(state)
        env.render()
예제 #3
0
def test(opt):
    torch.manual_seed(123)
    env, num_states, num_actions = create_train_env(opt.world, opt.stage, opt.action_type,
                                                    f"{opt.output_path}/video_{opt.world}_{opt.stage}.mp4")
    model = ActorCritic(num_states, num_actions)

    model.load_state_dict(torch.load(f"{opt.saved_path}/a3c_super_mario_bros_{opt.world}_{opt.stage}",
                                     map_location=lambda storage, loc: storage))
    model.eval()
    state = torch.from_numpy(env.reset())
    done = True

    while True:
        if done:
            h_0 = torch.zeros((1, 512), dtype=torch.float)
            c_0 = torch.zeros((1, 512), dtype=torch.float)
            env.reset()
        else:
            h_0 = h_0.detach()
            c_0 = c_0.detach()

        logits, value, h_0, c_0 = model(state, h_0, c_0)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        action = int(action)
        state, reward, done, info = env.step(action)
        state = torch.from_numpy(state)
        env.render()

        if info["flag_get"]:
            print(f"World {opt.world} stage {opt.stage} completed")
            break
def test_a3c(index, args, A3C_shared_model, CAE_shared_model):

    #load CAE models weights into this project
    CAE_shared_model.load_state_dict(torch.load(
        args.pretrained_model_weights_path),
                                     strict=False)
    CAE_shared_model.eval()

    torch.manual_seed(123 + index)
    #instantiate the environment
    env, num_states, num_actions = build_environment(args.world, args.stage)
    #initialize CAE worker modell
    CAE_local_model = Convolutional_AutoEncoder()
    #load weights
    CAE_local_model.load_state_dict(torch.load(
        args.pretrained_model_weights_path),
                                    strict=False)
    CAE_local_model.eval()
    #initialize A3C part of the model
    a3c_local_model = ActorCritic(num_states, num_actions)
    a3c_local_model.eval()
    state = torch.from_numpy(env.reset())
    done = True
    curr_step = 0
    actions = deque(maxlen=args.max_actions)
    # this loop runs until number of steps expire
    while True:
        curr_step += 1

        if done:
            a3c_local_model.load_state_dict(A3C_shared_model.state_dict())
        with torch.no_grad():
            if done:
                hx = torch.zeros((1, 512), dtype=torch.float)
                cx = torch.zeros((1, 512), dtype=torch.float)
            else:
                hx = hx.detach()
                cx = cx.detach()
        #send state output as input into CAE
        outputs_cae = CAE_local_model(state)
        #CAE output into A3C model as input
        #in return receive the policy, value, and memories and then choose action
        logits, value, hx, cx = a3c_local_model(outputs_cae, hx, cx)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        #use the chosen action and make step
        state, reward, done, _ = env.step(action)
        env.render()
        actions.append(action)
        # if finished or defeated, finish the test
        if curr_step > args.max_steps or actions.count(
                actions[0]) == actions.maxlen:
            done = True
        if done:
            curr_step = 0
            actions.clear()
            state = env.reset()
        state = torch.from_numpy(state)
def local_test(index, opt, global_model, start_time, curr_episode):
    info = {}
    info["flag_get"] = False
    torch.manual_seed(123 + index)
    env, num_states, num_actions = create_train_env(opt.world, opt.stage,
                                                    opt.action_type)
    local_model = ActorCritic(num_states, num_actions)
    local_model.eval()
    state = torch.from_numpy(env.reset())
    done = True
    curr_step = 0
    actions = deque(maxlen=opt.max_actions)
    while True and info["flag_get"] == False:
        curr_step += 1
        if done:
            local_model.load_state_dict(global_model.state_dict())
        with torch.no_grad():
            if done:
                h_0 = torch.zeros((1, 512), dtype=torch.float)
                c_0 = torch.zeros((1, 512), dtype=torch.float)
            else:
                h_0 = h_0.detach()
                c_0 = c_0.detach()

        logits, value, h_0, c_0 = local_model(state, h_0, c_0)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        state, reward, done, _ = env.step(action)
        env.render()
        actions.append(action)
        if curr_step > opt.num_global_steps or actions.count(
                actions[0]) == actions.maxlen:
            done = True
        if done:
            curr_step = 0
            actions.clear()
            state = env.reset()
        state = torch.from_numpy(state)

        if info["flag_get"]:
            print("完成")
            end_time = timeit.default_timer()
            config_state = {
                'net': global_model.state_dict(),
                'curr_episode': curr_episode,
                'time': end_time - start_time,
            }

            torch.save(
                config_state,
                "{}/a3c_super_mario_bros_{}_{}".format(opt.saved_path,
                                                       opt.world, opt.stage))

            return True
        else:
            env.close()
            return False
예제 #6
0
def test(opt):
    torch.manual_seed(123)
    env, num_states, num_actions = create_train_env(
        opt.world, opt.stage, opt.action_type,
        "{}/video_{}_{}.mp4".format(opt.output_path, opt.world, opt.stage))
    model = ActorCritic(num_states, num_actions)
    if torch.cuda.is_available():
        model_dict = torch.load("{}/a3c_super_mario_bros_{}_{}".format(
            opt.saved_path, opt.world, opt.stage))
        model.load_state_dict(model_dict['net'])
        model.cuda()
        print("episode", model_dict['curr_episode'])
        print("time", model_dict['time'])
    else:
        model_dict = torch.load("{}/a3c_super_mario_bros_{}_{}".format(
            opt.saved_path, opt.world, opt.stage),
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(model_dict['net'])
        print("episode", model_dict['curr_episode'])
        print("time", model_dict['time'])

    model.eval()
    state = torch.from_numpy(env.reset())
    done = True
    while True:
        if done:
            h_0 = torch.zeros((1, 512), dtype=torch.float)
            c_0 = torch.zeros((1, 512), dtype=torch.float)
            env.reset()
        else:
            h_0 = h_0.detach()
            c_0 = c_0.detach()
        if torch.cuda.is_available():
            h_0 = h_0.cuda()
            c_0 = c_0.cuda()
            state = state.cuda()

        logits, value, h_0, c_0 = model(state, h_0, c_0)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        action = int(action)
        state, reward, done, info = env.step(action)
        state = torch.from_numpy(state)
        env.render()

        if info["flag_get"]:
            print("World {} stage {} completed".format(opt.world, opt.stage))
            break
예제 #7
0
파일: test.py 프로젝트: wuhoward/CCM_Final
def test(opt):
    #torch.manual_seed(123)
    if not os.path.isdir(opt.output_path):
        os.makedirs(opt.output_path)
    env, num_states, num_actions = create_train_env(1, opt, "{}/test.mp4".format(opt.output_path))
    model = ActorCritic(num_states, num_actions)
    if opt.use_gpu and torch.cuda.is_available():
        model.load_state_dict(torch.load("{}/a3c".format(opt.resume_path)))
        model.cuda()
    else:
        model.load_state_dict(torch.load("{}/a3c".format(opt.resume_path),
                                         map_location=lambda storage, loc: storage))
    model.eval()
    state = torch.from_numpy(env.reset(False, False, True))
    round_done, stage_done, game_done = False, False, True
    num_action = 0
    while True:
        if round_done or stage_done or game_done:
            h_0 = torch.zeros((1, 256), dtype=torch.float)
            c_0 = torch.zeros((1, 256), dtype=torch.float)
        else:
            h_0 = h_0.detach()
            c_0 = c_0.detach()
        if torch.cuda.is_available():
            h_0 = h_0.cuda()
            c_0 = c_0.cuda()
            state = state.cuda()

        logits, value, h_0, c_0 = model(state, h_0, c_0)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        action = int(action)
        num_action += 1
        state, reward, round_done, stage_done, game_done = env.step(action)
        state = torch.from_numpy(state)
        if round_done or stage_done:
            state = torch.from_numpy(env.reset(round_done, stage_done, game_done))
        if game_done or num_action == opt.max_steps:
            env.make_anim()
            print("Game over")
            break
def test(opt):
    torch.manual_seed(123)
    env, num_states, num_actions = create_train_env(
        1, "{}/video.mp4".format(opt.output_path))
    model = ActorCritic(num_states, num_actions)
    if torch.cuda.is_available():
        model.load_state_dict(
            torch.load("{}/a3c_street_fighter".format(opt.saved_path)))
        model.cuda()
    else:
        model.load_state_dict(
            torch.load("{}/a3c_street_fighter".format(opt.saved_path),
                       map_location=lambda storage, loc: storage))
    model.eval()
    state = torch.from_numpy(env.reset(False, False, True))
    round_done, stage_done, game_done = False, False, True
    while True:
        if round_done or stage_done or game_done:
            h_0 = torch.zeros((1, 1024), dtype=torch.float)
            c_0 = torch.zeros((1, 1024), dtype=torch.float)
        else:
            h_0 = h_0.detach()
            c_0 = c_0.detach()
        if torch.cuda.is_available():
            h_0 = h_0.cuda()
            c_0 = c_0.cuda()
            state = state.cuda()

        logits, value, h_0, c_0 = model(state, h_0, c_0)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        action = int(action)
        state, reward, round_done, stage_done, game_done = env.step(action)
        state = torch.from_numpy(state)
        if round_done or stage_done:
            state = torch.from_numpy(
                env.reset(round_done, stage_done, game_done))
        if game_done:
            print("Game over")
            break
def test(opt):
    viewer = rendering.SimpleImageViewer()
    viewer.width = 800 * 2
    viewer.height = 600 * 2
    #1920x1080
    viewer.window = pyglet.window.Window(width=viewer.width, height=viewer.height, resizable=True)
    
    torch.manual_seed(123)
    if opt.output_path != None:
        env, num_states, num_actions = create_train_env(opt.world, opt.stage, opt.action_type,
                                                    "{}/video_{}_{}.mp4".format(opt.output_path, opt.world, opt.stage))
    else:
        env, num_states, num_actions = create_train_env(opt.world, opt.stage, opt.action_type,None)
    model = ActorCritic(num_states, num_actions)
    if torch.cuda.is_available():
        model.load_state_dict(torch.load("{}/a3c_super_mario_bros_{}_{}".format(opt.saved_path, opt.world, opt.stage)))
        model.cuda()
    else:
        model.load_state_dict(torch.load("{}/a3c_super_mario_bros_{}_{}".format(opt.saved_path, opt.world, opt.stage),
                                         map_location=lambda storage, loc: storage))
    model.eval()
    state = torch.from_numpy(env.reset())
    done = True
    max_x_pos = 0
    max_x_pos_counter = 0
    while True:
        if done:
            h_0 = torch.zeros((1, 512), dtype=torch.float)
            c_0 = torch.zeros((1, 512), dtype=torch.float)
            print('done')
            max_x_pos = 0
            max_x_pos_counter = 0
            env.reset()
            done = False
        else:
            h_0 = h_0.detach()
            c_0 = c_0.detach()
        if torch.cuda.is_available():
            h_0 = h_0.cuda()
            c_0 = c_0.cuda()
            state = state.cuda()

        logits, value, h_0, c_0 = model(state, h_0, c_0)
        policy = F.softmax(logits, dim=1)
        action = torch.argmax(policy).item()
        action = int(action)
        state, reward, done, info = env.step(action)
        rgb = env.render('rgb_array')
        state = torch.from_numpy(state)
        
        viewer.imshow(rgb)
        if max_x_pos_counter < 50:
            time.sleep(0.06)
        if reward < 0:
            max_x_pos_counter += 1
        if max_x_pos_counter > 150:
            print('no progress, stopping')
            done = True
        
        if info["flag_get"]:
            print("World {} stage {} completed".format(opt.world, opt.stage))
            done = True
            copyfile("{}/a3c_super_mario_bros_{}_{}".format(opt.saved_path, opt.world, opt.stage), "{}/a3c_super_mario_bros_{}_{}_{}".format(opt.saved_path, info["world"], info["stage"],random.random()))
        print(reward,COMPLEX_MOVEMENT[action])
    print('done testing')
def shared_learn(args):

    os.environ['OMP_NUM_THREADS'] = '1'
    torch.manual_seed(123)
    # create path for logs
    if os.path.isdir(args.sum_path):
        shutil.rmtree(args.sum_path)
    os.makedirs(args.sum_path)
    if not os.path.isdir(args.trained_models_path):
        os.makedirs(args.trained_models_path)
    mp = _mp.get_context('spawn')

    # create initial mario environment
    env, num_states, num_actions = build_environment(args.world, args.stage)

    print('Num of states: {}'.format(num_states))  #4
    print('environment: {}'.format(env))
    print('Num of actions: {}'.format(num_actions))  #12

    # check if cuda is available else cpu
    device = torch.device('cuda' if (
        args.use_cuda and torch.cuda.is_available()) else 'cpu')

    CAE_shared_model = Convolutional_AutoEncoder()  #.to(device)
    A3C_shared_model = ActorCritic(num_states, num_actions)  #.to(device)
    # if a new stage, then it picks up previous saved model
    if args.new_stage:
        A3C_shared_model.load_state_dict(
            torch.load('{}/a3c_super_mario_bros_{}_{}_enc2'.format(
                args.world, args.stage, args.trained_models_path)))
        A3C_shared_model.eval()
    # GPU check
    if (args.use_cuda and torch.cuda.is_available()):
        A3C_shared_model.cuda()
        CAE_shared_model.cuda()
    # shares memory with worker instances
    CAE_shared_model.share_memory()

    A3C_shared_model.share_memory()

    print('A3C')
    print(A3C_shared_model)
    # intialize optimizer
    optimizer_cae = CAE_shared_model.createLossAndOptimizer(
        CAE_shared_model, 0.001)
    optimizer_a3c = SharedAdam(A3C_shared_model.parameters(), lr=args.lr)
    #optimizer.share_memory()

    # processes
    workers = []

    # start train process (run for the set number of workers)
    for rank in range(args.num_processes):
        if rank == 0:
            worker = mp.Process(target=train_a3c,
                                args=(rank, args, optimizer_a3c,
                                      A3C_shared_model, CAE_shared_model,
                                      optimizer_cae, True))
        else:
            worker = mp.Process(target=train_a3c,
                                args=(rank, args, optimizer_a3c,
                                      A3C_shared_model, CAE_shared_model,
                                      optimizer_cae, True))
        worker.start()
        worker.append(worker)

    # test worker
    worker = mp.Process(target=test_a3c,
                        args=(rank, args, A3C_shared_model, CAE_shared_model))
    worker.start()
    workers.append(worker)

    # join all processes
    for worker in workers:
        worker.join()