def main(args):
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    print(args)
    #args.env="MountainCarContinuous-v0"
    train_copos(args)
def main():
    logger.configure(
        'E:\\Project\\Toyota RL\\Toyata 2018\\Toyata RL 4th quarter\\log')
    # 'F:\\GuanYang\\toyota2018_4\\log'
    parser = common_arg_parser()
    parser.add_argument('--load_model_path', default=None)
    parser.set_defaults(num_timesteps=int(2e7))

    args = parser.parse_args()
    env = environment.Env(N=6,
                          pattern=[0, 2, 4, 8, 9, 10],
                          height=30,
                          width=30)

    if not args.play:
        # train the model
        train(env=env,
              num_timesteps=args.num_timesteps,
              load_model_path=args.load_model_path)
    else:
        # construct the model object, load pre-trained model and render
        pi = train(env=env, num_timesteps=1)
        U.load_state(args.load_model_path)
        ob = env.manualSet(modelList=env.pattern)
        while True:
            action = pi.act(stochastic=False, ob=ob)[0]
            # ob, _, done, _ =  env.step(action)
            ob, rew, done, _ = env.updateEnv(action)
            env.showEnv()
            if done:
                ob = env.manualSet(modelList=env.pattern)
Exemple #3
0
def main():
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = {
        k: parse(v)
        for k, v in parse_unknown_args(unknown_args).items()
    }

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, _ = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        env = build_env(args)
        obs = env.reset()
        while True:
            actions = model.step(obs)[0]
            obs, _, done, _ = env.step(actions)
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done

            if done:
                obs = env.reset()
Exemple #4
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    if args.log_path == None:
        date_str = '{}'.format(
            datetime.datetime.today().strftime('%Y-%m-%d_%H-%M-%S'))
        folder_name = args.alg
        if 'iterative' in extra_args: folder_name += '-iterative'
        if args.name is not None: folder_name += '-' + args.name
        args.log_path = osp.abspath(
            osp.join('./logs', folder_name, args.env, date_str))
        args.save_path = args.log_path

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    # save opts
    with open(osp.join(args.log_path, 'args.json'), 'w') as fp:
        json.dump(vars(args), fp, indent=1)

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()

    env.close()

    return model
Exemple #5
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    if args.log_path is not None:
        # =========modifiy the log path with time=============
        time = datetime.datetime.now().strftime('%y_%a_%b_%d_%H:%M:%S:%f')
        args.log_path = os.path.join(args.log_path, time)
        # =====================================================
    if args.save_path is not None:
        # =========modifiy the save path with time=============
        time = datetime.datetime.now().strftime('%y_%a_%b_%d_%H:%M:%S:%f')
        args.save_path = os.path.join(args.save_path, time)
        # =====================================================

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])
    model, env = train(args, extra_args)
    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)

        # =========modifiy the save path with time=============
        # save_path_custom = os.path.join(save_path,time)
        # =====================================================
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        # from ipdb import set_trace; set_trace()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()

    env.close()

    return model
Exemple #6
0
def main(args):

    start_time = time.time()
    # TODO: restore model and return training
    # load model is simple, but restore loggings will be more difficult
    # configure logger, disable logging in child MPI processes (with rank > 0)
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if "load_path" in extra_args:
        if extra_args["load_path"] is True:
            extra_args["load_path"] = get_loading_path(args, extra_args)
    # ./logs / env + alg / experiments_name
    # [no_staliro, fixed_staliro, randomized_staliro, weighted_queue, variable_start, success_counter]
    os.environ["OPENAI_LOGDIR"] = get_logging_path(args, extra_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        model.save(osp.join(logger.get_dir(), args.save_path, 'model.pkl'))

    print("Elapsed time {}".format(time.time() - start_time))
    return model
Exemple #7
0
def main():
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = parse_cmdline_kwargs(unknown_args)

    env = build_testenv(args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()
    model, env = train(args, extra_args)
    env.close()

    if args.play:
        logger.log("Running trained model")
        env = build_testenv(args)
        obs = env.reset()

        for i in range(966):

            actions, _, _, _ = model.step(obs)
            obs, _, done, _ = env.step(actions)

        env.close()
Exemple #8
0
def main():
    parser = common_arg_parser()

    ######################################################################
    # MY CUSTOM ARGS

    parser.add_argument('--save-interval',
                        type=int,
                        default=100,
                        help="Interval between saves and stuff")
    parser.add_argument('--output-prefix',
                        required=True,
                        help="Fire prefix of parameter saves")

    # TODO Disabled for now!!! CPU thing isn't critical though
    # parser.add_argument('--num-cpus', type=int, default=1,
    #                     help="Number of CPU cores to use? Idk...")
    # parser.add_argument('--hidden-dims', type=str, default="64,64",
    #                     help="Within quotes, sizes of each hidden layer "
    #                     + "seperated by commas [also, no whitespace]")

    # END CUSTOM ARGS
    ######################################################################

    args = parser.parse_args()
    logger.configure()

    train(num_timesteps=args.num_timesteps,
          seed=args.seed,
          save_interval=args.save_interval,
          output_prefix=args.output_prefix)
Exemple #9
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)
    env.close()

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        env = build_env(args)
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))
        cum_flowtime_epi = np.zeros(1000)

        for i_episode in range(1000):
            while True:
                print('--------------------')
                if state is not None:
                    actions, _, state, _ = model.step(obs, S=state, M=dones)
                else:
                    actions, _, _, _ = model.step(obs)

                obs, _, done, _ = env.step(actions)
                print('Actions: {}'.format(actions))
                #env.render()
                done = done.any() if isinstance(done, np.ndarray) else done

                if done:
                    print('Done. Rendering......')
                    cum_flowtime_epi[i_episode] = env.render()
                    if i_episode == 999:
                        print(cum_flowtime_epi[i_episode])
                    break

        env.close()

    # write to file
    # np.savetxt('data_normal.txt', cum_flowtime_epi) # Used to plot a2c on single link/path under synthetic data
    np.savetxt(
        'data_normal_no_training.txt', cum_flowtime_epi
    )  # Used to plot a2c without pre-training on single link/path under synthetic data

    return model
Exemple #10
0
def main():
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = {
        k: parse(v)
        for k, v in parse_unknown_args(unknown_args).items()
    }

    pickle_in = open("./tmp/make_model.pkl", "rb")
    # pickle_in = open("./tmp/my_model","rb")
    make_model = pickle.load(pickle_in)
    model = make_model()
    model.load("./tmp/my_model")  #can use checkpoints

    logger.log("Running trained model")
    env = build_env(args)
    obs = env.reset()
    # print(obs)
    while True:
        actions = model.step(
            obs
        )[0]  #0th are actions ... few more other array in step .. need to check for ppo
        obs, _, done, _ = env.step(actions)
        # env.render()
        # done = done.any() if isinstance(done, np.ndarray) else done
        done = done.all() if isinstance(done, np.ndarray) else done
        print("step")
        if done:
            break
Exemple #11
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = np.zeros(env.num_envs) if isinstance(
            env, VecEnv) else np.zeros(1)
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            image = env.get_image()
            if (image):
                # print(image[0])
                img2 = np.array(image[0])
                angel = getRect(image[0])

                env.set_rotation(angel)

                # cv2.imshow('frame',image[0])

                # cv2.waitKey(0)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew
            env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    print('episode_rew={}'.format(episode_rew[i]))
                    episode_rew[i] = 0

    env.close()

    return model
Exemple #12
0
def run():
    # configure logger, disable logging in child MPI processes (with rank > 0)

    #print('enter main function')
    args = [
        'run.py', '--alg=ppo2', '--env=RacecarBulletEnv-v0',
        '--num_timesteps=0',
        '--load_path=/Users/huangyixuan/models/racecar_ppo2', '--play'
    ]
    print(args)
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    print('unknown_args')
    print(unknown_args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    print('extra')
    print(extra_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        #configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)
    return model
Exemple #13
0
def main():
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = parse_cmdline_kwargs(unknown_args)

    args.num_timesteps = 0
    args.play = True
    args.env = 'YamaXRealForwardWalk-v0'

    model, env = train(args, extra_args)
    env.close()

    env = build_env(args)
    obs = env.reset()

    def initialize_placeholders(nlstm=128, **kwargs):
        return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1))

    state, dones = initialize_placeholders(**extra_args)
    while True:
        actions, _, state, _ = model.step(obs, S=state, M=dones)
        obs, _, done, _ = env.step(actions)
        env.render()
        done = done.any() if isinstance(done, np.ndarray) else done

        if done:
            obs = env.reset()

    env.close()
Exemple #14
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        # import time
        rank = 0
        # if args.log_path:
        #     args.log_path = osp.join(args.log_path, time.strftime("%Y-%m-%d-%H-%M-%S"))
        args.log_path = configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    if args.play:
        args.num_timesteps = 0
        args.num_env = 1

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None

        dones = np.zeros((1, ))

        episode_rew = np.zeros(env.num_envs) if isinstance(
            env, VecEnv) else np.zeros(1)
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)
            if isinstance(actions, list):
                actions = actions[0]
            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            if args.vis_sleep > 0:
                import time
                time.sleep(args.vis_sleep)
            # print(f"gc:{obs['observation'][...,3:]}")
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
    env.close()

    return model
Exemple #15
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    time_now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    log_path = './results/{alg}/experiment-{time}'.format(alg=args.alg, time=time_now)
    os.makedirs(log_path)
    args.log_path = log_path
    with open(args.log_path + '/config.json', 'w', encoding='utf-8') as f:
        json.dump(vars(args), f, ensure_ascii=False, indent=4)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        ckpt = tf.train.Checkpoint(model=model)
        manager = tf.train.CheckpointManager(ckpt, save_path, max_to_keep=None)
        manager.save()

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()
        if not isinstance(env, VecEnv):
            obs = np.expand_dims(np.array(obs), axis=0)

        state = model.initial_state if hasattr(model, 'initial_state') else None

        episode_rew = np.zeros(env.num_envs) if isinstance(env, VecEnv) else np.zeros(1)
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs)
            else:
              actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions.numpy())
            if not isinstance(env, VecEnv):
                obs = np.expand_dims(np.array(obs), axis=0)
            episode_rew += rew
            env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    print('episode_rew={}'.format(episode_rew[i]))
                    episode_rew[i] = 0

    env.close()

    return model
Exemple #16
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if args.extra_import is not None:
        import_module(args.extra_import)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    # If argument indicate training to be done:
    model, env = train(args, extra_args)
    env.close()

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)
        saver = tf.train.Saver()

        #logger.info("saving the trained model")
        #start_time_save = time.time()
        #saver.save(sess, save_path + "ddpg_test_model")
        #logger.info('runtime saving: {}s'.format(time.time() - start_time_save))

    # If it is a test run on the learned model
    if args.play:
        logger.log("Running trained model")
        env = build_env(args)
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, _, done, _ = env.step(actions)
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done

            if done:
                obs = env.reset()

        env.close()

    return model
Exemple #17
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    #import_module(args.custom_env_module)
    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])
    if args.play:
        args.num_timesteps = 0
        args.num_env = 1
    model, env = train(args, extra_args)


    if args.play:
        logger.log("Running trained model")
        checkdir = osp.join(logger.get_dir(), 'checkpoints')
        paths = os.listdir(checkdir)
        print(f"loading model: {paths[-1]}")
        model.load(osp.join(checkdir, paths[-1]))
        obs = env.reset()

        state = model.initial_state if hasattr(model, 'initial_state') else None
        dones = np.zeros((1,))

        episode_rew = np.zeros(env.num_envs) if isinstance(env, VecEnv) else np.zeros(1)


        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs[0],S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs[0])
            #Set interact side to always be 1
            placeholder_action = np.zeros_like(actions)
            actions = np.concatenate([actions,placeholder_action], axis=0)
            obs, rew, done, _ = env.step(actions, play=True)
            episode_rew += rew
            # env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    print('episode_rew={}'.format(episode_rew[i]))
                    episode_rew[i] = 0
    else:
        if args.save_path is not None and rank == 0:
            save_path = osp.expanduser(args.save_path)
            model.save(save_path)

    env.close()

    return model
Exemple #18
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    #print("\n \n \n \n \n HI1 \n \n \n \n \n")

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)

    #print("\n \n \n \n \n HI2 \n \n \n \n \n")
    model, env = train(args, extra_args)
    #print("\n \n \n \n \n HI3 \n \n \n \n \n")

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    #print("\n \n \n \n \n HI4 \n \n \n \n \n")
    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            #print("\n \n \n \n \n HI1 \n \n \n \n \n")

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            time.sleep(3)
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()

    env.close()

    return model
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)
    arg_parser = common_arg_parser()
    arg_parser = common.arguments.get_parser(arg_parser)
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    configs = common.config.get_config(args.env, args.experiment_name)

    args.save_path = os.path.join(configs.trained_directory, 'model.ckpt')

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
    else:
        rank = MPI.COMM_WORLD.Get_rank()

    # setup my logger and baselines' logger
    logger = common.config.setup_logger(args.verbose, args.model_name, configs.log_directory)

    # setup wandb
    logger_formats = ['stdout', 'log', 'csv']
    if args.use_wandb:
        logger_formats.append('wandb')
    baselines.logger.configure(configs.model_path, logger_formats, **vars(args))

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model, 'initial_state') else None
        dones = np.zeros((1,))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()

    env.close()

    return model
Exemple #20
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = np.zeros(env.num_envs) if isinstance(
            env, VecEnv) else np.zeros(1)
        goal_ind = 0
        num_goals = model.num_goals
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs,
                                                  goal_ind,
                                                  S=state,
                                                  M=dones)
            else:
                actions, _, _, _ = model.step(obs, goal_ind)

            obs, rew, done, _ = env.step(actions)
            if rew != -1 and goal_ind < num_goals - 1:
                goal_ind += 1
            episode_rew += rew
            env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    print('episode_rew={}'.format(episode_rew[i]))
                    episode_rew[i] = 0
                    goal_ind = 0

    env.close()

    return model
Exemple #21
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)
    np.set_printoptions(precision=3)

    arg_parser = common_arg_parser()
    arg_parser.add_argument('--id',
                            help='name of the experiment for saving',
                            type=str,
                            default=None)
    arg_parser.add_argument('--config',
                            help='path to the algorithm config',
                            type=str,
                            default=None)
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if args.id is None:
        print('Please, specify the name of the experiment in --id')
        exit(0)

    if args.config is None:
        print('Please, specify the path to the algorithm config via --config')
        exit(0)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    train(args, extra_args)
    return

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        env = DotaEnvironment()
        obs = env.reset()

        def initialize_placeholders(nlstm=128, **kwargs):
            return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1))

        state, dones = initialize_placeholders(**extra_args)
        while True:
            actions, _, state, _ = model.step(obs, S=state, M=dones)
            obs, _, done, _ = env.step(actions)
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done

            if done:
                obs = env.reset()
        env.close()
Exemple #22
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        pi = []
        for obs in range(1, env.observation_space.n):
            actions, _, _, _ = model.step([obs])
            pi.append(actions[0])
        print(pi)
        obs = env.reset()
        episode_rew, cnt, sum_reward = 0, 0, 0
        while cnt < 100:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                sum_reward += episode_rew
                episode_rew = 0
                cnt += 1
                # print('episode_rew={}'.format(episode_rew))
                obs = env.reset()
        print('men_reward={}'.format(sum_reward / cnt))

    env.close()

    return model
Exemple #23
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    with open(os.path.join(logger.get_dir(), 'args.json'), 'w') as arg_file:
        args_copy = vars(args).copy()  # start with x's keys and values
        args_copy.update(extra_args)
        import subprocess
        args_copy['git_commit'] = subprocess.check_output(
            ["git", "describe", "--always"]).strip().decode("utf-8")
        json.dump(args_copy, arg_file)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()

    env.close()

    return model
Exemple #24
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)
    observations_data = []
    actions_data = []
    episode_rewards = []
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()
        print(env.observation_space)
        print(env.action_space)
        if not isinstance(env, VecEnv):
            obs = np.expand_dims(np.array(obs), axis=0)

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None

        episode_rew = np.zeros(env.num_envs) if isinstance(
            env, VecEnv) else np.zeros(1)
        for n in range(10000):
            if state is not None:
                actions, _, state, _ = model.step(obs)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions.numpy())
            observations_data.append(obs)
            actions_data.append(actions.numpy())

            if not isinstance(env, VecEnv):
                obs = np.expand_dims(np.array(obs), axis=0)
            episode_rew += rew
            env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    print('episode_rew={}'.format(episode_rew[i]))
                    episode_rewards.append(episode_rew[i])
                    episode_rew[i] = 0
            if n % 1000 == 0:
                print(n)
        print(np.mean(episode_rewards))
    env.close()
Exemple #25
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    arg_parser.add_argument('-l',
                            '--list',
                            nargs='+',
                            help='<Required> Set flag',
                            required=True)

    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)
    args.alg = 'ppo2'
    args.num_timesteps = 0
    args.load_path = 'final'
    args.env = 'BubbleBobble-Nes'
    model, env = train(args, extra_args)
    logger.log("Running trained model")
    del env
    score_cum = 0
    for i in args.list:

        args.gamestate = 'Level{}.state'.format(i)
        env = build_env(args)
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = np.zeros(env.num_envs) if isinstance(
            env, VecEnv) else np.zeros(1)
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, info = env.step(actions)
            episode_rew += rew
            env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    stage = args.gamestate
                    score = info[0]['score'] * 10
                    score_cum += score
                    print('State: {}, Score: {}, Score_cum: {}'.format(
                        stage, score, score_cum))
                    episode_rew[i] = 0
                break
        env.close()
        del env

    return model
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        print("Inside custom run file and about to save model")
        save_path = osp.expanduser(args.save_path)
        print("Model is: ")
        print(model)
        model.save(save_path)


#        print("Let's try messing around with other save formats")
#        models.save_model(model, filepath='./models/testingAlgo')

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = np.zeros(env.num_envs) if isinstance(
            env, VecEnv) else np.zeros(1)
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew
            env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    print('episode_rew={}'.format(episode_rew[i]))
                    episode_rew[i] = 0

    env.close()

    return model
Exemple #27
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        ckpt = tf.train.Checkpoint(step=model.optimizer.iterations,
                                   model=model)
        manager = tf.train.CheckpointManager(ckpt, save_path, max_to_keep=None)
        print('before save, all trainable weights are {}'.format(
            model.train_model.policy_network.trainable_weights))
        #ckpt.save(save_path)
        manager.save()
        #model.save_weights(save_path, save_format='tf')

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()

    env.close()

    return model
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    print(
        "Baselines.run -- configure logger, disable logging in child MPI processes (with rank > 0)"
    )

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        print("Baselines.run -- MPI rank == 0 or None")
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        print("Baselines.run -- MPI rank: ", rank)
        configure_logger(args.log_path, format_strs=[])

    # All execution passes through here
    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None
        dones = np.zeros((1, ))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()

    env.close()

    return model
Exemple #29
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        configure_logger(args.log_path)
    else:
        rank = MPI.COMM_WORLD.Get_rank()
        configure_logger(args.log_path, format_strs=[])

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        ckpt = tf.train.Checkpoint(model=model)
        manager = tf.train.CheckpointManager(ckpt, save_path, max_to_keep=None)
        manager.save()

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()
        if not isinstance(env, VecEnv):
            obs = np.expand_dims(np.array(obs), axis=0)

        state = model.initial_state if hasattr(model,
                                               'initial_state') else None

        episode_rew = np.zeros(env.num_envs) if isinstance(
            env, VecEnv) else np.zeros(1)
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions.numpy())
            if not isinstance(env, VecEnv):
                obs = np.expand_dims(np.array(obs), axis=0)
            episode_rew += rew
            env.render()
            done_any = done.any() if isinstance(done, np.ndarray) else done
            if done_any:
                for i in np.nonzero(done)[0]:
                    print('episode_rew={}'.format(episode_rew[i]))
                    episode_rew[i] = 0

    env.close()

    return model
Exemple #30
0
def main(args):

    np.set_printoptions(
        precision=3)  # по дефолту 8 знаков после запятой , теперь 3

    arg_parser = common_arg_parser()

    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    model, env = train(args, extra_args)
    return model
Exemple #31
0
def main(args):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args(args)
    extra_args = parse_cmdline_kwargs(unknown_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        obs = env.reset()

        state = model.initial_state if hasattr(model, 'initial_state') else None
        dones = np.zeros((1,))

        episode_rew = 0
        while True:
            if state is not None:
                actions, _, state, _ = model.step(obs,S=state, M=dones)
            else:
                actions, _, _, _ = model.step(obs)

            obs, rew, done, _ = env.step(actions)
            episode_rew += rew[0] if isinstance(env, VecEnv) else rew
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done
            if done:
                print('episode_rew={}'.format(episode_rew))
                episode_rew = 0
                obs = env.reset()

    env.close()

    return model