Beispiel #1
0
def main():
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = {
        k: parse(v)
        for k, v in parse_unknown_args(unknown_args).items()
    }

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, _ = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        env = build_env(args)
        obs = env.reset()
        while True:
            actions = model.step(obs)[0]
            obs, _, done, _ = env.step(actions)
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done

            if done:
                obs = env.reset()
Beispiel #2
0
def main():
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = {
        k: parse(v)
        for k, v in parse_unknown_args(unknown_args).items()
    }

    pickle_in = open("./tmp/make_model.pkl", "rb")
    # pickle_in = open("./tmp/my_model","rb")
    make_model = pickle.load(pickle_in)
    model = make_model()
    model.load("./tmp/my_model")  #can use checkpoints

    logger.log("Running trained model")
    env = build_env(args)
    obs = env.reset()
    # print(obs)
    while True:
        actions = model.step(
            obs
        )[0]  #0th are actions ... few more other array in step .. need to check for ppo
        obs, _, done, _ = env.step(actions)
        # env.render()
        # done = done.any() if isinstance(done, np.ndarray) else done
        done = done.all() if isinstance(done, np.ndarray) else done
        print("step")
        if done:
            break
Beispiel #3
0
def main():
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = {
        k: run.parse(v)
        for k, v in parse_unknown_args(unknown_args).items()
    }

    train(args, extra_args)
Beispiel #4
0
def parse_cmdline_kwargs(args):
    '''
    convert a list of '='-spaced command-line arguments to a dictionary, evaluating python objects when possible
    '''
    def parse(v):
        assert isinstance(v, str)
        try:
            return eval(v)
        except (NameError, SyntaxError):
            return v

    return {k: parse(v) for k, v in parse_unknown_args(args).items()}
Beispiel #5
0
def main():
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = parse_cmdline_kwargs(unknown_args)
    unknown_a = parse_unknown_args(unknown_args)
    print('args')
    print(args)

    # The parser does not seem to accept new arguments, so I parse custom arguments here.
    print('extra_args')
    print(extra_args)
    if 'progress_dir' in extra_args:

        del extra_args['progress_dir']
    print('Deleted progress_dir. new extra_arg:')
    print(extra_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        if 'progress_dir' in unknown_a:
            logger.configure(dir=unknown_a['progress_dir'])
        else:
            logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)
    env.close()

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        env = build_env(args)
        obs = env.reset()
        def initialize_placeholders(nlstm=128,**kwargs):
            return np.zeros((args.num_env or 1, 2*nlstm)), np.zeros((1))
        state, dones = initialize_placeholders(**extra_args)
        while True:
            actions, _, state, _ = model.step(obs,S=state, M=dones)
            obs, _, done, _ = env.step(actions)
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done

            if done:
                obs = env.reset()

        env.close()
Beispiel #6
0
def parse_cmdline_kwargs(args):
    '''
    convert a list of '='-spaced command-line arguments to a dictionary, evaluating python objects when possible
    '''
    def parse(v):

        assert isinstance(v, str)
        try:
            return eval(v)
        except (NameError, SyntaxError):
            return v

    return {k: parse(v) for k,v in parse_unknown_args(args).items()}
Beispiel #7
0
def main():
    # configure logger, disable logging in child MPI processes (with rank > 0)
    dir = "test2"
    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = {
        k: parse(v)
        for k, v in parse_unknown_args(unknown_args).items()
    }

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure(dir)
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank

    args.play = True
    args.num_env = 3
    #args.nsteps = 512
    #print("ARGS IS ", args)
    #print("extra_args IS ", extra_args)
    print("")
    print("RUNNING CORRECTLY")
    model, _ = train(args, extra_args)

    #args.save_path = "/Users/romc/Documents/RNN_exploation_learning/baselines/test/checkpoints/0001"

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        # init act model
        logger.log("Running trained model")
        env = build_env(args)
        obs = env.reset()
        s = model.initial_state
        #print("initial state ", str(s))
        m = [False for _ in range(1)]
        print("obs is what " + str(obs))
        step = 0
        done = False
        while done == False:
            step = step + 1
            print(" ")
            print(" STEP " + str(step))
            out = model.step(obs, S=s, M=m)
            #print("out "+str(out))
            #print("out[0] "+str(out[0]))
            #print("out[1] "+str(out[1]))
            #print("out[2] "+str(out[2]))
            s = out[2]
            m = out[3]
            actions = out[0]

            print("actions " + str(actions))
            print(type(actions[0]))

            #if step <5:
            #    continue

            obs, _, done, _ = env.step(actions)
            print(obs)
            print(done)

            done = done.any() if isinstance(done, np.ndarray) else done

            if done:
                obs = env.reset()
def main():
    # configure logger, disable logging in child MPI processes (with rank > 0)

    arg_parser = common_arg_parser()
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = parse_cmdline_kwargs(unknown_args)
    unknown_a = parse_unknown_args(unknown_args)
    print('args')
    print(args)

    # The parser does not seem to accept new arguments, so I parse custom arguments here.
    print('extra_args')
    print(extra_args)
    if 'progress_dir' in extra_args:

        del extra_args['progress_dir']
    print('Deleted progress_dir. new extra_arg:')
    print(extra_args)

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        if 'progress_dir' in unknown_a:
            logger.configure(dir=unknown_a['progress_dir'])
        else:
            logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, env = train(args, extra_args)

    #saver = tf.train.Saver()
    #saver.save(model.sess, 'results/launch_balancer_04.sh/sess')

    # #######
    # logger.log("Test model")
    # obs = env.reset()
    # def initialize_placeholders(nlstm=128,**kwargs):
    #     return np.zeros((args.num_env or 1, 2*nlstm)), np.zeros((1))
    # state, dones = initialize_placeholders(**extra_args)
    #
    # actions, _, state, _ = model.step(obs,S=state, M=dones)
    # obs, _, done, _ = env.step(actions)
    #
    # actions, _, state, _ = model.step(obs,S=state, M=dones)
    # obs, _, done, _ = env.step(actions)
    #
    # print("Observations: ", obs)
    # print("Action: ", actions)
    # env.render()
    # done = done.any() if isinstance(done, np.ndarray) else done
    #
    # if done:
    #     obs = env.reset()
    #
    # #####
    # env.close()
    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        # env = build_env(args)
        obs = env.reset()

        def initialize_placeholders(nlstm=128, **kwargs):
            return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1))

        state, dones = initialize_placeholders(**extra_args)

        #####
        #import numpy
        #from servorobots.network.agent_mlp import AgentMLP
        #agent = AgentMLP("results/launch_balancer_04.sh/weight.weights", tf.nn.tanh)
        with tf.Session() as sess:
            #####
            print(model.act_model)
            while True:
                actions, _, state, _ = model.step(obs, S=state, M=dones)
                # writer = tf.summary.FileWriter("results/launch_balancer_04.sh/grahp", sess.graph)

                #print("From checkpoint: ", actions1)

                #states = numpy.reshape([0,0,0,0,0,0,0,0], 8)
                #actions = agent.act(sess, numpy.reshape(states, 8))
                #print("From weight file: ", actions)

                obs, _, done, _ = env.step(actions)
                print('Observation: ' + str(obs[0][3]) + ' Actions: ' +
                      str(actions))
                env.render()
                done = done.any() if isinstance(done, np.ndarray) else done

                if done:
                    obs = env.reset()

        env.close()
Beispiel #9
0
def main():
    # configure logger, disable logging in child MPI processes (with rank > 0)
    arg_parser = common_arg_parser()
    arg_parser.add_argument('--options_play', help='Agent play with options',
                            default=False, action='store_true')
    arg_parser.add_argument(
        '--selective_option_play', default=False, action='store_true',
        help='Agent play with selective option')
    args, unknown_args = arg_parser.parse_known_args()
    extra_args = {k: parse(v) for k, v in parse_unknown_args(unknown_args).items()}

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, _ = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("Running trained model")
        env = build_env(args)
        obs = env.reset()
        w, h, _ = obs[0].shape
        video = MovieWriter(osp.join(logger.get_dir(), "play.mp4"), (w, h), 2)
        video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8))
        import cv2
        cv2.imwrite(os.path.join(logger.get_dir(), "init_ob.png"),
                    obs[0][:, :, ::-1])
        i = 0
        while i < 1000:
            i += 1
            actions = model.step(obs, stochastic=True)[0]
            obs, _, done, _ = env.step(actions)
            # env.render()
            video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8))
            # done = done.any() if isinstance(done, np.ndarray) else done
            done = done[0]

            if done:
                obs = env.reset()
                env.close()
                video.close()
                break

    if args.selective_option_play:
        logger.log("Running selective option play with trained options policy")
        env = build_env(args)
        obs = env.reset()
        w, h, _ = obs[0].shape
        video = MovieWriter(
            osp.join(logger.get_dir(), "selective_option_play.mp4"), (w, h), 2)
        video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8))
        import cv2
        cv2.imwrite(os.path.join(logger.get_dir(), "init_ob.png"),
                    obs[0][:, :, ::-1])
        i = 0
        while i < 1000:
            i += 1
            actions = model.selective_option_step(obs, stochastic=True)[0]
            obs, _, done, _ = env.step(actions)
            # env.render()
            video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8))
            # done = done.any() if isinstance(done, np.ndarray) else done
            done = done[0]

            if done:
                obs = env.reset()
                env.close()
                video.close()
                break

    if args.options_play:
        logger.log("Running trained options policy")
        video_path = osp.join(logger.get_dir(), "options_play")
        if not osp.exists(video_path):
            os.mkdir(video_path)

        # assume 64 options
        for i in range(64):
            env = build_env(args)
            obs = env.reset()
            w, h, _ = obs[0].shape
            logger.log("Create op_play_{}.mp4".format(i))
            video = MovieWriter(
                osp.join(video_path, "op_play_{}.mp4".format(i)), (w, h), 2)
            video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8))
            option_z = np.zeros((env.num_envs, 64))
            option_z[:, i] = 1.0

            step = 1
            while step < 1000:
                actions = model.option_step(option_z, obs, stochastic=True)[0]
                discri = model.option_select(obs)[0]
                logger.log("step: {} discriminator: {}".format(step, discri))
                obs, _, done, _ = env.step(actions)
                video.add_frame(np.array(obs[0][:, :, ::-1], dtype=np.uint8))
                done = done[0]

                step += 1

                if done:
                    obs = env.reset()
                    env.close()
                    video.close()
                    break
Beispiel #10
0
def main(args=None):
    # configure logger, disable logging in child MPI processes (with rank > 0)

    if args is None:
        from thesis_galljamov18.python.training.guro_train import LOAD_MODEL
        arg_parser = common_arg_parser()
        args, unknown_args = arg_parser.parse_known_args()
        extra_args = {
            k: parse(v)
            for k, v in parse_unknown_args(unknown_args).items()
        }
        """All args: {'nsteps': 2048, 'nminibatches': 32, 'lam': 0.95, 'gamma': 0.99, 'noptepochs': 10, 'log_interval': 1, 
        'ent_coef': 0.0, 'lr': <function mujoco.<locals>.<lambda> at 0x7f8f5af49f28>, 'cliprange': 0.2, 'value_network': 'copy'}"""

        # train my environment instead default one
        args.env = "Guro-v0"
        args.num_timesteps = 0 if LOAD_MODEL else 10e6 + 1e5
        args.play = LOAD_MODEL
        args.alg = 'ppo2'
        args.network = 'mlp'

        # change further arguments
        # nsteps = 2048
        # nminibatches = 32
        # gamma = 0.95
        # lr = 0.001
        # cliprange = 0.2

        # extra_args.update({'nsteps': nsteps, 'nminibatches': nminibatches, 'gamma': gamma, 'cliprange': cliprange})
        # extra_args.update({'lr': 1e-10})
    else:
        extra_args = {}

    if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:
        rank = 0
        logger.configure()
    else:
        logger.configure(format_strs=[])
        rank = MPI.COMM_WORLD.Get_rank()

    model, _ = train(args, extra_args)

    if args.save_path is not None and rank == 0:
        save_path = osp.expanduser(args.save_path)
        model.save(save_path)

    if args.play:
        logger.log("\n------------\nRunning trained model\n------------\n")

        def say(text):
            os.system(
                'spd-say "{}" --volume -1 --voice-type male2'.format(text))

        # say("Attention please! Running trained model in 10 seconds!")
        # import time
        # time.sleep(10)
        env = build_env(args)
        obs = env.reset()
        #env.ob_rms.mean = [0,0,0,0,0,0] #[0., 0.39362465587763634, 0., -0.11370739423088674, 0.01929697539211253, 0.5066570016460371]
        # [ 0,         0.46073392,  0,          0.20411958, -0.05412459,  0.49079091]
        #         print("\n----------\nOBSERV_MEANS of loaded model: " + str(env.ob_rms.mean) + "\n----------\n")
        # exit(33)
        while True:
            actions = model.step(obs)[0]
            obs, _, done, _ = env.step(actions)
            env.render()
            done = done.any() if isinstance(done, np.ndarray) else done

            if done:
                obs = env.reset()