help="If specified, sets the recording dir, default=Disabled") parser.add_argument("-s", "--save", type=int, help="If specified, save every N-th step as an image") parser.add_argument("--acktr", default=False, action='store_true', help="Enable Acktr-specific tweaks") args = parser.parse_args() get_link_state = rospy.ServiceProxy("/gazebo/get_link_state", GetLinkState) pitch = 0 rospy.Subscriber('/Bobby/imu', Imu, get_angular_vel) counter = 0 env = make_env(args) if args.record: env = wrappers.Monitor(env, args.record) net = model.ModelActor(env.observation_space.shape[0], env.action_space.shape[0], args.hid) if args.acktr: opt = kfac.KFACOptimizer(net) net.load_state_dict(torch.load(args.model)) obs = env.reset() total_reward = 0.0 total_steps = 0 while True: obs_v = torch.FloatTensor(obs)
GAMMA = 0.99 REWARD_STEPS = 5 BATCH_SIZE = 32 LEARNING_RATE_ACTOR = 1e-5 LEARNING_RATE_CRITIC = 1e-3 ENTROPY_BETA = 1e-3 ENVS_COUNT = 16 if __name__ == "__main__": parser = make_parser() args, device, save_path, test_env, maxeps, maxsec = parse_args( parser, "a2c") envs = [make_env(args) for _ in range(ENVS_COUNT)] net_act, net_crt = make_nets(args, envs[0], device) writer = SummaryWriter(comment="-a2c_" + args.name) agent = model.AgentA2C(net_act, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast( envs, agent, GAMMA, steps_count=REWARD_STEPS) opt_act = optim.Adam(net_act.parameters(), lr=LEARNING_RATE_ACTOR) opt_crt = optim.Adam(net_crt.parameters(), lr=LEARNING_RATE_CRITIC) batch = [] best_reward = None tstart = time.time()