예제 #1
0
def main():
    # Parse the JSON arguments
    # config_dir = ".\config\breakout.json"
    config_args = parse_args()

    tf.reset_default_graph()

    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=config_args.num_envs,
                            inter_op_parallelism_threads=config_args.num_envs)

    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # Prepare Directories
    config_args.experiment_dir, config_args.summary_dir, config_args.checkpoint_dir, config_args.output_dir, config_args.test_dir = \
        create_experiment_dirs(config_args.experiment_dir)

    use_VAE = True

    a2c = A2C(sess, config_args, use_VAE)

    if config_args.to_train:
        a2c.train()
    if config_args.to_test:
        a2c.test(total_timesteps=10000000)
def gen_data(gen_args, render=False):
    """ Format: (obs, action, reward, done)
    """
    batch_num, postfix, max_steps, frame_skip = gen_args
    file_name = 'Breakout_raw_{}_{:04d}'.format(postfix, batch_num)

    # env = gym.make("BreakoutNoFrameskip-v4")
    # obs_data = []
    observation_list = []
    obs_mask_list = []
    actions_list = []
    values_list = []
    dones_list = []

    # configuration set-up
    config_args = parse_args()

    tf.reset_default_graph()

    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=config_args.num_envs,
                            inter_op_parallelism_threads=config_args.num_envs)

    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # args = config_args
    # model = Model(sess, optimizer_params={'learning_rate': args.learning_rate, 'alpha': 0.99, 'epsilon': 1e-5}, args=args)
    # a2c2 = Trainer(sess, model, args)
    # a2c2._init_model()
    # a2c2._load_model()
    #
    # states = a2c2.model.step_policy.initial_state
    #
    # dones = [False for _ in range(env.num_envs)]
    #
    # observation_s = np.zeros(
    #     (env.num_envs, a2c2.model.img_height, a2c2.model.img_width,
    #      a2c2.model.num_classes * a2c2.model.num_stack),
    #     dtype=np.uint8)
    # observation_s = __observation_update(env.reset(), observation_s)

    # Prepare Directories
    config_args.experiment_dir, config_args.summary_dir, config_args.checkpoint_dir, config_args.output_dir, config_args.test_dir = \
        create_experiment_dirs(config_args.experiment_dir)

    a2c = A2C(sess, config_args)

    # testing
    with open(a2c.args.experiment_dir + a2c.args.env_name + '.pkl', 'rb') as f:
        observation_space_shape, action_space_n = pickle.load(f)

    env = a2c.make_all_environments(num_envs=1,
                                    env_class=a2c.env_class,
                                    env_name=a2c.args.env_name,
                                    seed=a2c.args.env_seed)

    a2c.model.build(observation_space_shape, action_space_n)

    a2c.trainer._init_model()
    a2c.trainer._load_model()

    states = a2c.trainer.model.step_policy.initial_state

    dones = [False for _ in range(env.num_envs)]

    observation_s = np.zeros(
        (env.num_envs, a2c.trainer.model.img_height,
         a2c.trainer.model.img_width,
         a2c.trainer.model.num_classes * a2c.trainer.model.num_stack),
        dtype=np.uint8)
    observation_s = __observation_update(env.reset(), observation_s)
    mask_s = np.zeros_like(observation_s)

    i = 0
    while len(observation_list) < max_steps:

        actions, values, states = a2c.model.step_policy.step(
            observation_s, states, dones)
        observation, rewards, dones, _ = env.step(actions)
        for n, done in enumerate(dones):
            if done:
                observation_s[n] *= 0
                print(file_name, i, len(observation_list), max_steps, end='\r')
                # print(batch_num, len(observation_list), max_steps)

        # obs_mask = obs.astype(int) - obs
        obs_mask = observation.astype(int) - observation_s[:, :, :, -1, None]
        # obs_mask = observation_s.astype(int) - observation_s_new
        # obs_mask = np.abs(obs_mask)
        # obs_mask = np.expand_dims(obs_mask,-1)
        obs_mask = obs_mask * (obs_mask > 0)
        # obs_mask = np.mean(obs_mask, -1, keepdims=True).astype(np.uint8)
        # obs_mask = obs_mask / 255.
        # obs_mask = scipy.ndimage.filters.gaussian_filter(obs_mask, 5)
        # plt.imshow(obs_mask[:, :, 0]); plt.figure(); plt.imshow(obs); plt.show()
        # plt.imshow(obs_mask[0,:, :, 0]); plt.figure(); plt.imshow(observation[0,:,:,0]); plt.show()

        observation_s = __observation_update(observation, observation_s)
        mask_s = __observation_update(obs_mask, mask_s)
        # action = generate_action(env)
        # obs_, reward, done, info = env.step(action)

        if i % frame_skip == 0:
            # obs_data.append((obs, obs_mask, action, reward, done))
            # obs_data.append((observation, obs_mask, actions, values, dones))
            observation_list.append(observation_s)
            obs_mask_list.append(mask_s)
            actions_list.append(actions)
            values_list.append(values)
            dones_list.append(dones)

            if render: env.render()

        if dones:
            # obs, reward_sum, done = gym_utils.reset_env(env)
            # obs = data_utils.normalize_observation(obs)
            pass
        # else:
        # obs = data_utils.normalize_observation(observation_s)
        # observation_s = observation_s_new
        i += 1
    print()

    # data_as_array = np.concatenate(obs_data, 0)
    # data_as_array = np.vstack(obs_data)
    data_as_lists = [
        np.vstack(observation_list),
        np.vstack(obs_mask_list),
        np.asarray(actions_list),
        np.asarray(values_list),
        np.asarray(dones_list)
    ]

    ### Compute memory usage of obs
    # size_of_data = data_utils.getSize_lol(obs_data)
    # actual_total_obs = len(obs_data[0]) * len(obs_data)
    # size_per_obs = int(size_of_data/actual_total_obs)
    # print(data_utils.sizeof_fmt(size_per_obs))  # 24.6KiB
    # print(size_per_obs)  # 25218 Bytes

    # data_utils.save_np_array_as_h5(file_name, data_as_array)
    data_utils.save_lists_as_h5(file_name, data_as_lists)

    # print('Generated dataset with ', data_as_array.shape[0], "observations.")
    # print("Format: (obs, obs_mask, action, reward, done)")
    print('Saved batch: {:4}'.format(batch_num), '-', file_name)

    env.close()
    # return obs_data
    return file_name
예제 #3
0
def main():
    # model_name = 'breakout_discrete_BLM64_STD0_lr0.0001_LAT4096(2)_MADE1543847099'
    # model_path = 'C:\\Users\\Toke\\Dropbox\\MAI\\'

    model_name = 'VAEModel'
    model_path = 'C:\\Users\\Vlad-PC\\Desktop\\'

    model_path += model_name

    latent = [[32 * 128, 2]]
    # raw_dim = (210, 160, 3)
    # net_dim = (32*4, 32*3, 3)
    raw_dim = (84, 84, 4)
    net_dim = (84, 84, 4)

    ### Do stuff
    exp_param = ExpParam(
        lat_type="discrete",
        dataset='breakout',
        latent=[[32 * 128, 2]],
        raw_type=tf.uint8,
        raw_dim=raw_dim,
        net_dim=net_dim,  # very close to org aspect ration
        batch_size=2,  # for testing
    )

    ### Load model
    sess_ae, AE, saver = create_or_load_vae(
        model_path,
        exp_param=exp_param,
        critical_load=True)


    graph_a2c = tf.Graph()
    with graph_a2c.as_default():
        # tf.reset_default_graph()

        config_args = parse_args()
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=config_args.num_envs,
                                inter_op_parallelism_threads=config_args.num_envs)

        config.gpu_options.allow_growth = True
        sess_a2c = tf.Session(config=config)

        config_args.experiment_dir, config_args.summary_dir, config_args.checkpoint_dir, config_args.output_dir, config_args.test_dir = \
            create_experiment_dirs(config_args.experiment_dir)

        a2c = A2C(sess_a2c, config_args, True)

        env = A2C.make_all_environments(a2c.args.num_envs, a2c.env_class, a2c.args.env_name,
                                        a2c.args.env_seed)

        print("\n\nBuilding the model...")
        if a2c.useVAE:
            a2c.model.buildForVAE(env.observation_space.shape, env.action_space.n, a2c.latent_size)
        print("Model is built successfully\n")

        # with open(a2c.args.experiment_dir + a2c.args.env_name + '.pkl', 'wb') as f:
        #     pickle.dump((env.observation_space.shape, env.action_space.n), f, pickle.HIGHEST_PROTOCOL)

        print('Training...')

        # training
        if a2c.args.to_train:
            a2c.trainer.trainFromVAE(env, sess_ae, AE)

        # testing
        with open(a2c.args.experiment_dir + a2c.args.env_name + '.pkl', 'rb') as f:
            observation_space_shape, action_space_n = pickle.load(f)

        env = a2c.make_all_environments(
            num_envs=1,
            env_class=a2c.env_class,
            env_name=a2c.args.env_name,
            seed=a2c.args.env_seed)

        a2c.model.buildForVAE(observation_space_shape, action_space_n, a2c.latent_size)

        a2c.trainer._init_model()
        a2c.trainer._load_model()

        states = a2c.trainer.model.step_policy.initial_state

        dones = [False for _ in range(env.num_envs)]

        observation_s = np.zeros(
            (env.num_envs, a2c.trainer.model.img_height, a2c.trainer.model.img_width,
             a2c.trainer.model.num_classes * a2c.trainer.model.num_stack),
            dtype=np.uint8)

        observation = env.reset()
        observation_s = __observation_update(observation, observation_s)

        i = 0
        max_steps = 1e3
        while i < max_steps:
            i += 1
            observation_z = encode_data(AE, sess_ae, observation_s)

            ## TODO: Change a2c.model.step_policy.step
            actions, values, states = a2c.model.step_policy.step(observation_z, states, dones)

            observation, rewards, dones, _ = env.step(actions)

            for n, done in enumerate(dones):
                if done:
                    observation_s[n] *= 0
                    # print(file_name, i, len(observation_list), max_steps, end='\r')
                    # print(batch_num, len(observation_list), max_steps)

            # print()
            env.render()