예제 #1
0
        help="If specified, sets the recording dir, default=Disabled")
    parser.add_argument("-s",
                        "--save",
                        type=int,
                        help="If specified, save every N-th step as an image")
    parser.add_argument("--acktr",
                        default=False,
                        action='store_true',
                        help="Enable Acktr-specific tweaks")
    args = parser.parse_args()
    get_link_state = rospy.ServiceProxy("/gazebo/get_link_state", GetLinkState)
    pitch = 0
    rospy.Subscriber('/Bobby/imu', Imu, get_angular_vel)

    counter = 0
    env = make_env(args)
    if args.record:
        env = wrappers.Monitor(env, args.record)

    net = model.ModelActor(env.observation_space.shape[0],
                           env.action_space.shape[0], args.hid)
    if args.acktr:
        opt = kfac.KFACOptimizer(net)
    net.load_state_dict(torch.load(args.model))

    obs = env.reset()
    total_reward = 0.0
    total_steps = 0

    while True:
        obs_v = torch.FloatTensor(obs)
예제 #2
0
GAMMA = 0.99
REWARD_STEPS = 5
BATCH_SIZE = 32
LEARNING_RATE_ACTOR = 1e-5
LEARNING_RATE_CRITIC = 1e-3
ENTROPY_BETA = 1e-3
ENVS_COUNT = 16

if __name__ == "__main__":

    parser = make_parser()

    args, device, save_path, test_env, maxeps, maxsec = parse_args(
        parser, "a2c")

    envs = [make_env(args) for _ in range(ENVS_COUNT)]

    net_act, net_crt = make_nets(args, envs[0], device)

    writer = SummaryWriter(comment="-a2c_" + args.name)
    agent = model.AgentA2C(net_act, device=device)
    exp_source = ptan.experience.ExperienceSourceFirstLast(
        envs, agent, GAMMA, steps_count=REWARD_STEPS)

    opt_act = optim.Adam(net_act.parameters(), lr=LEARNING_RATE_ACTOR)
    opt_crt = optim.Adam(net_crt.parameters(), lr=LEARNING_RATE_CRITIC)

    batch = []
    best_reward = None
    tstart = time.time()