def main(): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = { k: parse(v) for k, v in parse_unknown_args(unknown_args).items() } if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, _ = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") env = build_env(args) obs = env.reset() while True: actions = model.step(obs)[0] obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset()
def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = { k: parse(v) for k, v in parse_unknown_args(unknown_args).items() } pickle_in = open("./tmp/make_model.pkl", "rb") # pickle_in = open("./tmp/my_model","rb") make_model = pickle.load(pickle_in) model = make_model() model.load("./tmp/my_model") #can use checkpoints logger.log("Running trained model") env = build_env(args) obs = env.reset() # print(obs) while True: actions = model.step( obs )[0] #0th are actions ... few more other array in step .. need to check for ppo obs, _, done, _ = env.step(actions) # env.render() # done = done.any() if isinstance(done, np.ndarray) else done done = done.all() if isinstance(done, np.ndarray) else done print("step") if done: break
def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = { k: run.parse(v) for k, v in parse_unknown_args(unknown_args).items() } train(args, extra_args)
def parse_cmdline_kwargs(args): ''' convert a list of '='-spaced command-line arguments to a dictionary, evaluating python objects when possible ''' def parse(v): assert isinstance(v, str) try: return eval(v) except (NameError, SyntaxError): return v return {k: parse(v) for k, v in parse_unknown_args(args).items()}
def main(): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) unknown_a = parse_unknown_args(unknown_args) print('args') print(args) # The parser does not seem to accept new arguments, so I parse custom arguments here. print('extra_args') print(extra_args) if 'progress_dir' in extra_args: del extra_args['progress_dir'] print('Deleted progress_dir. new extra_arg:') print(extra_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 if 'progress_dir' in unknown_a: logger.configure(dir=unknown_a['progress_dir']) else: logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) env.close() if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") env = build_env(args) obs = env.reset() def initialize_placeholders(nlstm=128,**kwargs): return np.zeros((args.num_env or 1, 2*nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) while True: actions, _, state, _ = model.step(obs,S=state, M=dones) obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close()
def parse_cmdline_kwargs(args): ''' convert a list of '='-spaced command-line arguments to a dictionary, evaluating python objects when possible ''' def parse(v): assert isinstance(v, str) try: return eval(v) except (NameError, SyntaxError): return v return {k: parse(v) for k,v in parse_unknown_args(args).items()}
def main(): # configure logger, disable logging in child MPI processes (with rank > 0) dir = "test2" arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = { k: parse(v) for k, v in parse_unknown_args(unknown_args).items() } if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure(dir) else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank args.play = True args.num_env = 3 #args.nsteps = 512 #print("ARGS IS ", args) #print("extra_args IS ", extra_args) print("") print("RUNNING CORRECTLY") model, _ = train(args, extra_args) #args.save_path = "/Users/romc/Documents/RNN_exploation_learning/baselines/test/checkpoints/0001" if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: # init act model logger.log("Running trained model") env = build_env(args) obs = env.reset() s = model.initial_state #print("initial state ", str(s)) m = [False for _ in range(1)] print("obs is what " + str(obs)) step = 0 done = False while done == False: step = step + 1 print(" ") print(" STEP " + str(step)) out = model.step(obs, S=s, M=m) #print("out "+str(out)) #print("out[0] "+str(out[0])) #print("out[1] "+str(out[1])) #print("out[2] "+str(out[2])) s = out[2] m = out[3] actions = out[0] print("actions " + str(actions)) print(type(actions[0])) #if step <5: # continue obs, _, done, _ = env.step(actions) print(obs) print(done) done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset()
def main(): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) unknown_a = parse_unknown_args(unknown_args) print('args') print(args) # The parser does not seem to accept new arguments, so I parse custom arguments here. print('extra_args') print(extra_args) if 'progress_dir' in extra_args: del extra_args['progress_dir'] print('Deleted progress_dir. new extra_arg:') print(extra_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 if 'progress_dir' in unknown_a: logger.configure(dir=unknown_a['progress_dir']) else: logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) #saver = tf.train.Saver() #saver.save(model.sess, 'results/launch_balancer_04.sh/sess') # ####### # logger.log("Test model") # obs = env.reset() # def initialize_placeholders(nlstm=128,**kwargs): # return np.zeros((args.num_env or 1, 2*nlstm)), np.zeros((1)) # state, dones = initialize_placeholders(**extra_args) # # actions, _, state, _ = model.step(obs,S=state, M=dones) # obs, _, done, _ = env.step(actions) # # actions, _, state, _ = model.step(obs,S=state, M=dones) # obs, _, done, _ = env.step(actions) # # print("Observations: ", obs) # print("Action: ", actions) # env.render() # done = done.any() if isinstance(done, np.ndarray) else done # # if done: # obs = env.reset() # # ##### # env.close() if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") # env = build_env(args) obs = env.reset() def initialize_placeholders(nlstm=128, **kwargs): return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) ##### #import numpy #from servorobots.network.agent_mlp import AgentMLP #agent = AgentMLP("results/launch_balancer_04.sh/weight.weights", tf.nn.tanh) with tf.Session() as sess: ##### print(model.act_model) while True: actions, _, state, _ = model.step(obs, S=state, M=dones) # writer = tf.summary.FileWriter("results/launch_balancer_04.sh/grahp", sess.graph) #print("From checkpoint: ", actions1) #states = numpy.reshape([0,0,0,0,0,0,0,0], 8) #actions = agent.act(sess, numpy.reshape(states, 8)) #print("From weight file: ", actions) obs, _, done, _ = env.step(actions) print('Observation: ' + str(obs[0][3]) + ' Actions: ' + str(actions)) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close()
def main(): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() arg_parser.add_argument('--options_play', help='Agent play with options', default=False, action='store_true') arg_parser.add_argument( '--selective_option_play', default=False, action='store_true', help='Agent play with selective option') args, unknown_args = arg_parser.parse_known_args() extra_args = {k: parse(v) for k, v in parse_unknown_args(unknown_args).items()} if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, _ = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") env = build_env(args) obs = env.reset() w, h, _ = obs[0].shape video = MovieWriter(osp.join(logger.get_dir(), "play.mp4"), (w, h), 2) video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8)) import cv2 cv2.imwrite(os.path.join(logger.get_dir(), "init_ob.png"), obs[0][:, :, ::-1]) i = 0 while i < 1000: i += 1 actions = model.step(obs, stochastic=True)[0] obs, _, done, _ = env.step(actions) # env.render() video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8)) # done = done.any() if isinstance(done, np.ndarray) else done done = done[0] if done: obs = env.reset() env.close() video.close() break if args.selective_option_play: logger.log("Running selective option play with trained options policy") env = build_env(args) obs = env.reset() w, h, _ = obs[0].shape video = MovieWriter( osp.join(logger.get_dir(), "selective_option_play.mp4"), (w, h), 2) video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8)) import cv2 cv2.imwrite(os.path.join(logger.get_dir(), "init_ob.png"), obs[0][:, :, ::-1]) i = 0 while i < 1000: i += 1 actions = model.selective_option_step(obs, stochastic=True)[0] obs, _, done, _ = env.step(actions) # env.render() video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8)) # done = done.any() if isinstance(done, np.ndarray) else done done = done[0] if done: obs = env.reset() env.close() video.close() break if args.options_play: logger.log("Running trained options policy") video_path = osp.join(logger.get_dir(), "options_play") if not osp.exists(video_path): os.mkdir(video_path) # assume 64 options for i in range(64): env = build_env(args) obs = env.reset() w, h, _ = obs[0].shape logger.log("Create op_play_{}.mp4".format(i)) video = MovieWriter( osp.join(video_path, "op_play_{}.mp4".format(i)), (w, h), 2) video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8)) option_z = np.zeros((env.num_envs, 64)) option_z[:, i] = 1.0 step = 1 while step < 1000: actions = model.option_step(option_z, obs, stochastic=True)[0] discri = model.option_select(obs)[0] logger.log("step: {} discriminator: {}".format(step, discri)) obs, _, done, _ = env.step(actions) video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8)) done = done[0] step += 1 if done: obs = env.reset() env.close() video.close() break
def main(args=None): # configure logger, disable logging in child MPI processes (with rank > 0) if args is None: from thesis_galljamov18.python.training.guro_train import LOAD_MODEL arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = { k: parse(v) for k, v in parse_unknown_args(unknown_args).items() } """All args: {'nsteps': 2048, 'nminibatches': 32, 'lam': 0.95, 'gamma': 0.99, 'noptepochs': 10, 'log_interval': 1, 'ent_coef': 0.0, 'lr': <function mujoco.<locals>.<lambda> at 0x7f8f5af49f28>, 'cliprange': 0.2, 'value_network': 'copy'}""" # train my environment instead default one args.env = "Guro-v0" args.num_timesteps = 0 if LOAD_MODEL else 10e6 + 1e5 args.play = LOAD_MODEL args.alg = 'ppo2' args.network = 'mlp' # change further arguments # nsteps = 2048 # nminibatches = 32 # gamma = 0.95 # lr = 0.001 # cliprange = 0.2 # extra_args.update({'nsteps': nsteps, 'nminibatches': nminibatches, 'gamma': gamma, 'cliprange': cliprange}) # extra_args.update({'lr': 1e-10}) else: extra_args = {} if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, _ = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("\n------------\nRunning trained model\n------------\n") def say(text): os.system( 'spd-say "{}" --volume -1 --voice-type male2'.format(text)) # say("Attention please! Running trained model in 10 seconds!") # import time # time.sleep(10) env = build_env(args) obs = env.reset() #env.ob_rms.mean = [0,0,0,0,0,0] #[0., 0.39362465587763634, 0., -0.11370739423088674, 0.01929697539211253, 0.5066570016460371] # [ 0, 0.46073392, 0, 0.20411958, -0.05412459, 0.49079091] # print("\n----------\nOBSERV_MEANS of loaded model: " + str(env.ob_rms.mean) + "\n----------\n") # exit(33) while True: actions = model.step(obs)[0] obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset()