def main(args): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) print(args) #args.env="MountainCarContinuous-v0" train_copos(args)
def main(): logger.configure( 'E:\\Project\\Toyota RL\\Toyata 2018\\Toyata RL 4th quarter\\log') # 'F:\\GuanYang\\toyota2018_4\\log' parser = common_arg_parser() parser.add_argument('--load_model_path', default=None) parser.set_defaults(num_timesteps=int(2e7)) args = parser.parse_args() env = environment.Env(N=6, pattern=[0, 2, 4, 8, 9, 10], height=30, width=30) if not args.play: # train the model train(env=env, num_timesteps=args.num_timesteps, load_model_path=args.load_model_path) else: # construct the model object, load pre-trained model and render pi = train(env=env, num_timesteps=1) U.load_state(args.load_model_path) ob = env.manualSet(modelList=env.pattern) while True: action = pi.act(stochastic=False, ob=ob)[0] # ob, _, done, _ = env.step(action) ob, rew, done, _ = env.updateEnv(action) env.showEnv() if done: ob = env.manualSet(modelList=env.pattern)
def main(): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = { k: parse(v) for k, v in parse_unknown_args(unknown_args).items() } if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, _ = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") env = build_env(args) obs = env.reset() while True: actions = model.step(obs)[0] obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset()
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if args.log_path == None: date_str = '{}'.format( datetime.datetime.today().strftime('%Y-%m-%d_%H-%M-%S')) folder_name = args.alg if 'iterative' in extra_args: folder_name += '-iterative' if args.name is not None: folder_name += '-' + args.name args.log_path = osp.abspath( osp.join('./logs', folder_name, args.env, date_str)) args.save_path = args.log_path if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) # save opts with open(osp.join(args.log_path, 'args.json'), 'w') as fp: json.dump(vars(args), fp, indent=1) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if args.log_path is not None: # =========modifiy the log path with time============= time = datetime.datetime.now().strftime('%y_%a_%b_%d_%H:%M:%S:%f') args.log_path = os.path.join(args.log_path, time) # ===================================================== if args.save_path is not None: # =========modifiy the save path with time============= time = datetime.datetime.now().strftime('%y_%a_%b_%d_%H:%M:%S:%f') args.save_path = os.path.join(args.save_path, time) # ===================================================== if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) # =========modifiy the save path with time============= # save_path_custom = os.path.join(save_path,time) # ===================================================== model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() # from ipdb import set_trace; set_trace() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model
def main(args): start_time = time.time() # TODO: restore model and return training # load model is simple, but restore loggings will be more difficult # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if "load_path" in extra_args: if extra_args["load_path"] is True: extra_args["load_path"] = get_loading_path(args, extra_args) # ./logs / env + alg / experiments_name # [no_staliro, fixed_staliro, randomized_staliro, weighted_queue, variable_start, success_counter] os.environ["OPENAI_LOGDIR"] = get_logging_path(args, extra_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) if args.save_path is not None and rank == 0: model.save(osp.join(logger.get_dir(), args.save_path, 'model.pkl')) print("Elapsed time {}".format(time.time() - start_time)) return model
def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) env = build_testenv(args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) env.close() if args.play: logger.log("Running trained model") env = build_testenv(args) obs = env.reset() for i in range(966): actions, _, _, _ = model.step(obs) obs, _, done, _ = env.step(actions) env.close()
def main(): parser = common_arg_parser() ###################################################################### # MY CUSTOM ARGS parser.add_argument('--save-interval', type=int, default=100, help="Interval between saves and stuff") parser.add_argument('--output-prefix', required=True, help="Fire prefix of parameter saves") # TODO Disabled for now!!! CPU thing isn't critical though # parser.add_argument('--num-cpus', type=int, default=1, # help="Number of CPU cores to use? Idk...") # parser.add_argument('--hidden-dims', type=str, default="64,64", # help="Within quotes, sizes of each hidden layer " # + "seperated by commas [also, no whitespace]") # END CUSTOM ARGS ###################################################################### args = parser.parse_args() logger.configure() train(num_timesteps=args.num_timesteps, seed=args.seed, save_interval=args.save_interval, output_prefix=args.output_prefix)
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) env.close() if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") env = build_env(args) obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) cum_flowtime_epi = np.zeros(1000) for i_episode in range(1000): while True: print('--------------------') if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, _, done, _ = env.step(actions) print('Actions: {}'.format(actions)) #env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('Done. Rendering......') cum_flowtime_epi[i_episode] = env.render() if i_episode == 999: print(cum_flowtime_epi[i_episode]) break env.close() # write to file # np.savetxt('data_normal.txt', cum_flowtime_epi) # Used to plot a2c on single link/path under synthetic data np.savetxt( 'data_normal_no_training.txt', cum_flowtime_epi ) # Used to plot a2c without pre-training on single link/path under synthetic data return model
def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = { k: parse(v) for k, v in parse_unknown_args(unknown_args).items() } pickle_in = open("./tmp/make_model.pkl", "rb") # pickle_in = open("./tmp/my_model","rb") make_model = pickle.load(pickle_in) model = make_model() model.load("./tmp/my_model") #can use checkpoints logger.log("Running trained model") env = build_env(args) obs = env.reset() # print(obs) while True: actions = model.step( obs )[0] #0th are actions ... few more other array in step .. need to check for ppo obs, _, done, _ = env.step(actions) # env.render() # done = done.any() if isinstance(done, np.ndarray) else done done = done.all() if isinstance(done, np.ndarray) else done print("step") if done: break
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = np.zeros(env.num_envs) if isinstance( env, VecEnv) else np.zeros(1) while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) image = env.get_image() if (image): # print(image[0]) img2 = np.array(image[0]) angel = getRect(image[0]) env.set_rotation(angel) # cv2.imshow('frame',image[0]) # cv2.waitKey(0) obs, rew, done, _ = env.step(actions) episode_rew += rew env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: print('episode_rew={}'.format(episode_rew[i])) episode_rew[i] = 0 env.close() return model
def run(): # configure logger, disable logging in child MPI processes (with rank > 0) #print('enter main function') args = [ 'run.py', '--alg=ppo2', '--env=RacecarBulletEnv-v0', '--num_timesteps=0', '--load_path=/Users/huangyixuan/models/racecar_ppo2', '--play' ] print(args) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) print('unknown_args') print(unknown_args) extra_args = parse_cmdline_kwargs(unknown_args) print('extra') print(extra_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 #configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) return model
def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) args.num_timesteps = 0 args.play = True args.env = 'YamaXRealForwardWalk-v0' model, env = train(args, extra_args) env.close() env = build_env(args) obs = env.reset() def initialize_placeholders(nlstm=128, **kwargs): return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) while True: actions, _, state, _ = model.step(obs, S=state, M=dones) obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close()
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: # import time rank = 0 # if args.log_path: # args.log_path = osp.join(args.log_path, time.strftime("%Y-%m-%d-%H-%M-%S")) args.log_path = configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) if args.play: args.num_timesteps = 0 args.num_env = 1 model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = np.zeros(env.num_envs) if isinstance( env, VecEnv) else np.zeros(1) while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) if isinstance(actions, list): actions = actions[0] obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() if args.vis_sleep > 0: import time time.sleep(args.vis_sleep) # print(f"gc:{obs['observation'][...,3:]}") done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) time_now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") log_path = './results/{alg}/experiment-{time}'.format(alg=args.alg, time=time_now) os.makedirs(log_path) args.log_path = log_path with open(args.log_path + '/config.json', 'w', encoding='utf-8') as f: json.dump(vars(args), f, ensure_ascii=False, indent=4) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) ckpt = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(ckpt, save_path, max_to_keep=None) manager.save() if args.play: logger.log("Running trained model") obs = env.reset() if not isinstance(env, VecEnv): obs = np.expand_dims(np.array(obs), axis=0) state = model.initial_state if hasattr(model, 'initial_state') else None episode_rew = np.zeros(env.num_envs) if isinstance(env, VecEnv) else np.zeros(1) while True: if state is not None: actions, _, state, _ = model.step(obs) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions.numpy()) if not isinstance(env, VecEnv): obs = np.expand_dims(np.array(obs), axis=0) episode_rew += rew env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: print('episode_rew={}'.format(episode_rew[i])) episode_rew[i] = 0 env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if args.extra_import is not None: import_module(args.extra_import) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() # If argument indicate training to be done: model, env = train(args, extra_args) env.close() if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) saver = tf.train.Saver() #logger.info("saving the trained model") #start_time_save = time.time() #saver.save(sess, save_path + "ddpg_test_model") #logger.info('runtime saving: {}s'.format(time.time() - start_time_save)) # If it is a test run on the learned model if args.play: logger.log("Running trained model") env = build_env(args) obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) #import_module(args.custom_env_module) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) if args.play: args.num_timesteps = 0 args.num_env = 1 model, env = train(args, extra_args) if args.play: logger.log("Running trained model") checkdir = osp.join(logger.get_dir(), 'checkpoints') paths = os.listdir(checkdir) print(f"loading model: {paths[-1]}") model.load(osp.join(checkdir, paths[-1])) obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1,)) episode_rew = np.zeros(env.num_envs) if isinstance(env, VecEnv) else np.zeros(1) while True: if state is not None: actions, _, state, _ = model.step(obs[0],S=state, M=dones) else: actions, _, _, _ = model.step(obs[0]) #Set interact side to always be 1 placeholder_action = np.zeros_like(actions) actions = np.concatenate([actions,placeholder_action], axis=0) obs, rew, done, _ = env.step(actions, play=True) episode_rew += rew # env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: print('episode_rew={}'.format(episode_rew[i])) episode_rew[i] = 0 else: if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) #print("\n \n \n \n \n HI1 \n \n \n \n \n") if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) #print("\n \n \n \n \n HI2 \n \n \n \n \n") model, env = train(args, extra_args) #print("\n \n \n \n \n HI3 \n \n \n \n \n") if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) #print("\n \n \n \n \n HI4 \n \n \n \n \n") if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) #print("\n \n \n \n \n HI1 \n \n \n \n \n") obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() time.sleep(3) done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() arg_parser = common.arguments.get_parser(arg_parser) args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) configs = common.config.get_config(args.env, args.experiment_name) args.save_path = os.path.join(configs.trained_directory, 'model.ckpt') if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 else: rank = MPI.COMM_WORLD.Get_rank() # setup my logger and baselines' logger logger = common.config.setup_logger(args.verbose, args.model_name, configs.log_directory) # setup wandb logger_formats = ['stdout', 'log', 'csv'] if args.use_wandb: logger_formats.append('wandb') baselines.logger.configure(configs.model_path, logger_formats, **vars(args)) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1,)) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = np.zeros(env.num_envs) if isinstance( env, VecEnv) else np.zeros(1) goal_ind = 0 num_goals = model.num_goals while True: if state is not None: actions, _, state, _ = model.step(obs, goal_ind, S=state, M=dones) else: actions, _, _, _ = model.step(obs, goal_ind) obs, rew, done, _ = env.step(actions) if rew != -1 and goal_ind < num_goals - 1: goal_ind += 1 episode_rew += rew env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: print('episode_rew={}'.format(episode_rew[i])) episode_rew[i] = 0 goal_ind = 0 env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) np.set_printoptions(precision=3) arg_parser = common_arg_parser() arg_parser.add_argument('--id', help='name of the experiment for saving', type=str, default=None) arg_parser.add_argument('--config', help='path to the algorithm config', type=str, default=None) args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if args.id is None: print('Please, specify the name of the experiment in --id') exit(0) if args.config is None: print('Please, specify the path to the algorithm config via --config') exit(0) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() train(args, extra_args) return if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") env = DotaEnvironment() obs = env.reset() def initialize_placeholders(nlstm=128, **kwargs): return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) while True: actions, _, state, _ = model.step(obs, S=state, M=dones) obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close()
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) pi = [] for obs in range(1, env.observation_space.n): actions, _, _, _ = model.step([obs]) pi.append(actions[0]) print(pi) obs = env.reset() episode_rew, cnt, sum_reward = 0, 0, 0 while cnt < 100: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: sum_reward += episode_rew episode_rew = 0 cnt += 1 # print('episode_rew={}'.format(episode_rew)) obs = env.reset() print('men_reward={}'.format(sum_reward / cnt)) env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) with open(os.path.join(logger.get_dir(), 'args.json'), 'w') as arg_file: args_copy = vars(args).copy() # start with x's keys and values args_copy.update(extra_args) import subprocess args_copy['git_commit'] = subprocess.check_output( ["git", "describe", "--always"]).strip().decode("utf-8") json.dump(args_copy, arg_file) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) observations_data = [] actions_data = [] episode_rewards = [] arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) if args.play: logger.log("Running trained model") obs = env.reset() print(env.observation_space) print(env.action_space) if not isinstance(env, VecEnv): obs = np.expand_dims(np.array(obs), axis=0) state = model.initial_state if hasattr(model, 'initial_state') else None episode_rew = np.zeros(env.num_envs) if isinstance( env, VecEnv) else np.zeros(1) for n in range(10000): if state is not None: actions, _, state, _ = model.step(obs) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions.numpy()) observations_data.append(obs) actions_data.append(actions.numpy()) if not isinstance(env, VecEnv): obs = np.expand_dims(np.array(obs), axis=0) episode_rew += rew env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: print('episode_rew={}'.format(episode_rew[i])) episode_rewards.append(episode_rew[i]) episode_rew[i] = 0 if n % 1000 == 0: print(n) print(np.mean(episode_rewards)) env.close()
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() arg_parser.add_argument('-l', '--list', nargs='+', help='<Required> Set flag', required=True) args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) args.alg = 'ppo2' args.num_timesteps = 0 args.load_path = 'final' args.env = 'BubbleBobble-Nes' model, env = train(args, extra_args) logger.log("Running trained model") del env score_cum = 0 for i in args.list: args.gamestate = 'Level{}.state'.format(i) env = build_env(args) obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = np.zeros(env.num_envs) if isinstance( env, VecEnv) else np.zeros(1) while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, info = env.step(actions) episode_rew += rew env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: stage = args.gamestate score = info[0]['score'] * 10 score_cum += score print('State: {}, Score: {}, Score_cum: {}'.format( stage, score, score_cum)) episode_rew[i] = 0 break env.close() del env return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: print("Inside custom run file and about to save model") save_path = osp.expanduser(args.save_path) print("Model is: ") print(model) model.save(save_path) # print("Let's try messing around with other save formats") # models.save_model(model, filepath='./models/testingAlgo') if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = np.zeros(env.num_envs) if isinstance( env, VecEnv) else np.zeros(1) while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: print('episode_rew={}'.format(episode_rew[i])) episode_rew[i] = 0 env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) ckpt = tf.train.Checkpoint(step=model.optimizer.iterations, model=model) manager = tf.train.CheckpointManager(ckpt, save_path, max_to_keep=None) print('before save, all trainable weights are {}'.format( model.train_model.policy_network.trainable_weights)) #ckpt.save(save_path) manager.save() #model.save_weights(save_path, save_format='tf') if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) print( "Baselines.run -- configure logger, disable logging in child MPI processes (with rank > 0)" ) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: print("Baselines.run -- MPI rank == 0 or None") rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() print("Baselines.run -- MPI rank: ", rank) configure_logger(args.log_path, format_strs=[]) # All execution passes through here model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1, )) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs, S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 configure_logger(args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() configure_logger(args.log_path, format_strs=[]) model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) ckpt = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(ckpt, save_path, max_to_keep=None) manager.save() if args.play: logger.log("Running trained model") obs = env.reset() if not isinstance(env, VecEnv): obs = np.expand_dims(np.array(obs), axis=0) state = model.initial_state if hasattr(model, 'initial_state') else None episode_rew = np.zeros(env.num_envs) if isinstance( env, VecEnv) else np.zeros(1) while True: if state is not None: actions, _, state, _ = model.step(obs) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions.numpy()) if not isinstance(env, VecEnv): obs = np.expand_dims(np.array(obs), axis=0) episode_rew += rew env.render() done_any = done.any() if isinstance(done, np.ndarray) else done if done_any: for i in np.nonzero(done)[0]: print('episode_rew={}'.format(episode_rew[i])) episode_rew[i] = 0 env.close() return model
def main(args): np.set_printoptions( precision=3) # по дефолту 8 знаков после запятой , теперь 3 arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) model, env = train(args, extra_args) return model
def main(args): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args(args) extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") obs = env.reset() state = model.initial_state if hasattr(model, 'initial_state') else None dones = np.zeros((1,)) episode_rew = 0 while True: if state is not None: actions, _, state, _ = model.step(obs,S=state, M=dones) else: actions, _, _, _ = model.step(obs) obs, rew, done, _ = env.step(actions) episode_rew += rew[0] if isinstance(env, VecEnv) else rew env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: print('episode_rew={}'.format(episode_rew)) episode_rew = 0 obs = env.reset() env.close() return model