return ret


if __name__ == '__main__':
    #Engine init
    env = gym.make('MountainCar-v0')

    #Load names
    models_dir = 'models'
    state_file = 'state'

    #Loading model
    Q_targ = QNet(3)
    weights_file_path, _ = utils.find_prev_state_files(models_dir, state_file)
    weights = torch.load(weights_file_path)
    Q_targ.load_state_dict(weights['Q_targ'])

    #Demonstration
    while True:
        env.reset()
        env.render()

        raw_state, reward, done, info = env.step(1)

        while not done:
            state = prepare_state(raw_state)

            action = np.argmax(
                Q_targ(FloatTensor(state)).to('cpu').detach().numpy())

            raw_state, reward, done, info = env.step(action)
Exemple #2
0
    out_dir = os.path.join(models_dir, str(int(time.time())))

    #Engine parameters
    env = gym.make('MountainCar-v0')

    num_actions = 3

    #Initing neural networks and loading previos state, if exists
    Q = QNet(num_actions=num_actions)
    Q_targ = QNet(num_actions=num_actions)

    prev_state = None
    if os.listdir(models_dir) == []:
        Q.apply(utils.init_weights)

        Q_targ.load_state_dict(Q.state_dict())
    else:
        weights_file_path, state_file_path = utils.find_prev_state_files(
            models_dir, state_file)

        weights = torch.load(weights_file_path)
        prev_state = torch.load(state_file_path)

        Q.load_state_dict(weights['Q'])
        Q_targ.load_state_dict(weights['Q_targ'])

    #Learn params
    gamma = 0.99

    #Hyperparams
    frame_count = 40000