Ejemplo n.º 1
0
    def __init__(self, load_model=True):
        self.env_name = './VisualPushBlock_withBlock_z_info.x86_64'  #'./VisualPushBlock.x86_64'
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = z_size

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer ###CHANGE is made here
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE)
            self.bias_output = np.random.randn(ACTION_SIZE)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * ACTION_SIZE + ACTION_SIZE)
        else:
            self.weight = np.random.randn(self.input_size, ACTION_SIZE)
            self.bias = np.random.randn(ACTION_SIZE)
            self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE

        self.render_mode = False
Ejemplo n.º 2
0
    def __init__(self, arglist):
        self.env_name = arglist.game
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json(arglist.vae_file)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.rnn.load_json(arglist.rnn_file)
        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 2)
            self.bias_output = np.random.randn(2)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 2 + 2)
        else:
            self.weight = np.random.randn(self.input_size, 2)
            self.bias = np.random.randn(2)
            self.param_count = (self.input_size) * 2 + 2

        self.render_mode = False
Ejemplo n.º 3
0
  def __init__(self, load_model=True):
    # For Mac
    # self.env_name = "/Users/intuinno/codegit/pushBlock/app/mac/VisualPushBlockContinuous"
    # For linux
    self.env_name = "/home/intuinno/codegit/pushblock/app/linux/pushblock.x86_64"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)

    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

    if load_model:
      self.vae.load_json('vae/vae.json')
      self.rnn.load_json('rnn/rnn.json')

    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32


    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False
Ejemplo n.º 4
0
  def __init__(self):
    self.env_name = "carracing"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
    self.vae.load_json('vae/vae.json')
    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
    self.rnn.load_json('rnn/rnn.json')
    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32

    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False
Ejemplo n.º 5
0
    def __init__(self, arglist, action_space, scope, load_model=True):
        self.action_space = action_space
        self.arglist = arglist
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample = hps_model._replace(
            batch_size=1,
            input_seq_width=32 + arglist.action_space +
            (arglist.agent_num - 1) * arglist.action_space * arglist.timestep,
            max_seq_len=1,
            use_recurrent_dropout=0,
            is_training=0)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json(arglist.vae_model_dir)
            self.rnn.load_json(arglist.rnn_model_dir)

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True
        if arglist.inference:
            self.input_size = rnn_output_size(
                EXP_MODE) + (arglist.agent_num - 1) * arglist.action_space
        else:
            self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        # action trajectories recording
        self.act_traj = [
            collections.deque(np.zeros(
                (arglist.timestep, arglist.action_space)),
                              maxlen=arglist.timestep)
        ] * (arglist.agent_num - 1)
        self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep,
                                     arglist.action_space,
                                     arglist.action_space,
                                     "oppo_model_{}".format(scope))
        self.inference = arglist.inference

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.action_space)
            self.bias_output = np.random.randn(self.action_space)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * self.action_space + self.action_space)
        else:
            self.weight = np.random.randn(self.input_size, self.action_space)
            self.bias = np.random.randn(self.action_space)
            self.param_count = (
                self.input_size) * self.action_space + self.action_space
Ejemplo n.º 6
0
 def __init__(self, full_episode=False):
   super(VAERacing, self).__init__()
   self._internal_counter = 0
   self.z_size = games['vae_racing'].input_size
   self.vae = ConvVAE(batch_size=1, z_size=self.z_size, gpu_mode=False, is_training=False, reuse=True)
   self.vae.load_json('vae/vae_'+str(self.z_size)+'.json')
   self.full_episode = full_episode
   high = np.array([np.inf] * self.z_size)
   self.observation_space = Box(-high, high)
   self._has_rendered = False
   self.real_frame = None
Ejemplo n.º 7
0
    def __init__(self,
            env,
            batchsize=64,
            input_size=(64,64),
            num_frame_stack=4,
            gamma=0.95,
            frame_skip=1,
            train_freq=4,
            initial_epsilon=1.0,
            min_epsilon=0.1,
            render=True,
            epsilon_decay_steps=int(1e6),
            min_experience_size=int(1e3),
            experience_capacity=int(1e5),
            network_update_freq=5000,
            regularization=1e-6,
            optimizer_params=None,
            action_map=None
        ):

        self.vae = ConvVAE(batch_size=batchsize, gpu_mode=False, is_training=False, reuse=True)
        self.vae.load_json('vae/vae.json')

        if action_map is not None:
            self.dim_actions = len(action_map)
        else:
            self.dim_actions = env.action_space.n

        self.network_update_freq = network_update_freq
        self.action_map = action_map
        self.env = env
        self.batchsize = batchsize
        self.num_frame_stack = num_frame_stack
        self.gamma = gamma
        self.frame_skip = frame_skip
        self.train_freq = train_freq
        self.initial_epsilon = initial_epsilon
        self.min_epsilon = min_epsilon
        self.epsilon_decay_steps = epsilon_decay_steps
        self.render = render
        self.min_experience_size = min_experience_size
        self.input_size = input_size
        self.regularization = regularization
        self.optimizer_params = optimizer_params or dict(learning_rate=0.0004, epsilon=1e-7)
        self.do_training = True
        self.playing_epsilon = 0.0
        self.session = None
        self.state_size = (self.num_frame_stack,) + self.input_size
        self.global_counter = 0
        self.episode_counter =0
Ejemplo n.º 8
0
 def __init__(self, full_episode=False, discrete_mode=False):
     super(VAERacingStack, self).__init__()
     self._internal_counter = 0
     self.z_size = games['vae_racing_stack'].input_size
     self.vae = ConvVAE(batch_size=1,
                        z_size=self.z_size,
                        num_channel=FRAME_STACK,
                        gpu_mode=False,
                        is_training=False,
                        reuse=True)
     self.vae.load_json('vae/vae_stack_' + str(FRAME_STACK) + '.json')
     self.full_episode = full_episode
     high = np.array([np.inf] * self.z_size)
     self.observation_space = Box(-high, high)
     self.cumulative_frames = None
     self._has_rendered = False
     self.discrete_mode = discrete_mode
Ejemplo n.º 9
0
    def __init__(self,
                 model_name='',
                 load_model=True,
                 load_full_model=False,
                 full_model_path=''):
        self.model_name = model_name
        self.env_name = "carracing"
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_full_model:
            self.vae.load_json(os.path.join(full_model_path, 'vae.json'))
            self.rnn.load_json(os.path.join(full_model_path, 'rnn.json'))
        elif load_model:
            self.vae.load_json(
                os.path.join(vae_path, self.model_name + '_vae.json'))
            self.rnn.load_json(
                os.path.join(rnn_path, self.model_name + '_rnn.json'))

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 3)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 3 + 3)
        else:
            self.weight = np.random.randn(self.input_size, 3)
            self.bias = np.random.randn(3)
            self.param_count = (self.input_size) * 3 + 3

        self.render_mode = False
Ejemplo n.º 10
0
 def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False):
     self.name = type + str(time.time())
     random.seed(30)
     self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True)
     self.image_dimension = [64,64]
     self.history_pick = history_pick
     self.state_space_size = history_pick * np.prod(self.image_dimension)
     self.action_space_size = 5
     self.state_shape = [None, self.history_pick] + list(self.image_dimension)
     self.history = []
     self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]}
     self.seed = seed
     self.detect_edges = detect_edges
     self.detect_grass = detect_grass
     self.flip = flip
     self.flip_episode = False
     self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
     self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
     self.vae.load_json('vae/vae.json')
     self.rnn.load_json('rnn/rnn.json')
Ejemplo n.º 11
0
    def __init__(self,
                 sess=None,
                 summary_writer=tf.summary.FileWriter("logs/"),
                 rl_training=False,
                 reuse=False,
                 cluster=None,
                 index=0,
                 device='/gpu:0',
                 ppo_load_path=None,
                 ppo_save_path=None,
                 load_worldmodel=True,
                 ntype='worldmodel'):
        self.policy_model_path_load = ppo_load_path + ntype
        self.policy_model_path_save = ppo_save_path + ntype

        self.rl_training = rl_training

        self.use_norm = True

        self.reuse = reuse
        self.sess = sess
        self.cluster = cluster
        self.index = index
        self.device = device

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_worldmodel:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.input_size = rnn_output_size(EXP_MODE)

        self._create_graph()

        self.rl_saver = tf.train.Saver()
        self.summary_writer = summary_writer
Ejemplo n.º 12
0
    def __init__(self, load_model=True):
        self.env_name = "Pong"
        self._make_env()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions)
        self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            raise Exception("not ported for atari")
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.num_actions)
            self.bias_output = np.random.randn(self.num_actions)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                (self.hidden_size + 1) * self.num_actions)
        else:
            # TODO: Not known until env.action_space is queried...
            self.weight = np.random.randn(self.input_size, self.num_actions)
            self.bias = np.random.randn(self.num_actions)
            self.param_count = (self.input_size + 1) * self.num_actions

        self.render_mode = False
Ejemplo n.º 13
0
    def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False):
        self.env_name = env_name
        self.make_env()
        self.z_size = 32

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na)
        self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.init_controller()

        self.render_mode = False
Ejemplo n.º 14
0
def sample_vae2(args):
    """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git
    """
    z_size = 32
    batch_size = args.count
    learning_rate = 0.0001
    kl_tolerance = 0.5
    model_path_name = "tf_vae"

    reset_graph()
    vae = ConvVAE(
        z_size=z_size,
        batch_size=batch_size,
        learning_rate=learning_rate,
        kl_tolerance=kl_tolerance,
        is_training=False,
        reuse=False,
        gpu_mode=False)  # use GPU on batchsize of 1000 -> much faster

    vae.load_json(os.path.join(model_path_name, 'vae.json'))

    z = np.random.normal(size=(args.count, z_size))
    samples = vae.decode(z)
    input_dim = samples.shape[1:]

    n = args.count
    plt.figure(figsize=(20, 4))
    plt.title('VAE samples')
    for i in range(n):
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(samples[i].reshape(input_dim[0], input_dim[1],
                                      input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    #plt.savefig( image_path )
    plt.show()
Ejemplo n.º 15
0
 def __init__(self, full_episode=False, pure_world=False):
     super(VAERacingWorld, self).__init__()
     self._internal_counter = 0
     self.z_size = games['vae_racing'].input_size
     self.vae = ConvVAE(batch_size=1,
                        z_size=self.z_size,
                        gpu_mode=False,
                        is_training=False,
                        reuse=True)
     self.vae.load_json('vae/vae_' + str(self.z_size) + '.json')
     self.full_episode = full_episode
     if pure_world:
         high = np.array([np.inf] * 10)
     else:
         high = np.array([np.inf] * (self.z_size + 10))
     self.observation_space = Box(-high, high)
     self._has_rendered = False
     self.real_frame = None
     self.world_model = SimpleWorldModel(obs_size=16,
                                         action_size=3,
                                         hidden_size=10)
     world_model_path = "./log/learn_vae_racing.cma.4.64.best.json"
     self.world_model.load_model(world_model_path)
     self.pure_world_mode = pure_world
Ejemplo n.º 16
0
batch_size=1000 # treat every episode as a batch of 1000!
learning_rate=0.0001
kl_tolerance=0.5

filelist = os.listdir(DATA_DIR)
filelist.sort()
filelist = filelist[0:10000]

dataset, action_dataset = load_raw_data_list(filelist)

reset_graph()

vae = ConvVAE(z_size=z_size,
              batch_size=batch_size,
              learning_rate=learning_rate,
              kl_tolerance=kl_tolerance,
              is_training=False,
              reuse=False,
              gpu_mode=False) # use GPU on batchsize of 1000 -> much faster

vae.load_json(os.path.join(model_path_name, 'vae.json'))
mu_dataset = []
logvar_dataset = []
dataset = dataset[:-1]
for i in range(len(dataset)):
  data_batch = dataset[i]
  if len(data_batch)!=1000:
    continue
  mu, logvar, z = encode_batch(data_batch)
  mu_dataset.append(mu.astype(np.float16))
  logvar_dataset.append(logvar.astype(np.float16))
Ejemplo n.º 17
0
    if not os.path.exists(arglist.series_dir):
        os.makedirs(arglist.series_dir)

    filelist = os.listdir(arglist.data_dir)
    filelist.sort()
    filelist = filelist[0:10000]

    dataset, action_dataset, oppo_action_dataset = load_raw_data_list(
        filelist, arglist)

    reset_graph()
    if arglist.use_vae:
        vae = ConvVAE(
            z_size=arglist.z_size,
            batch_size=arglist.batch_size,
            learning_rate=arglist.lr,
            kl_tolerance=arglist.kl_tolerance,
            is_training=False,
            reuse=False,
            gpu_mode=True)  # use GPU on batchsize of 1000 -> much faster

        vae.load_json(os.path.join(arglist.vae_path, 'vae.json'))

    mu_dataset = []
    logvar_dataset = []
    action_dataset_real = []
    oppo_action_dataset_real = []
    for i in range(len(dataset)):
        data_batch = dataset[i]
        if len(data_batch) <= arglist.batch_size:
            continue
        else:
Ejemplo n.º 18
0
    if arglist.use_image:
      dataset = create_dataset_with_image(filelist, arglist.data_dir)
    else:  
      dataset = create_dataset(filelist, arglist)

    # split into batches:
    total_length = len(dataset)
    num_batches = int(np.floor(total_length/arglist.batch_size))
    print("num_batches", num_batches)

    reset_graph()

    vae = ConvVAE(z_size=arglist.z_size,
                  batch_size=arglist.batch_size,
                  learning_rate=arglist.lr,
                  kl_tolerance=arglist.kl_tolerance,
                  is_training=True,
                  reuse=False,
                  gpu_mode=True)

    # train loop:
    print("train", "step", "loss", "recon_loss", "kl_loss")
    for epoch in range(arglist.epoch):
      np.random.shuffle(dataset)
      for idx in range(num_batches):
        batch = dataset[idx*arglist.batch_size:(idx+1)*arglist.batch_size]

        obs = np.array(batch).astype(np.float)/255.0

        feed = {vae.x: obs,}
Ejemplo n.º 19
0
filelist.sort()
filelist = filelist[0:10000]
#print("check total number of images:", count_length_of_filelist(filelist))
dataset = create_dataset(filelist)

# split into batches:
total_length = len(dataset)
num_batches = int(np.floor(total_length/batch_size))
print("num_batches", num_batches)

reset_graph()

vae = ConvVAE(z_size=z_size,
              batch_size=batch_size,
              learning_rate=learning_rate,
              kl_tolerance=kl_tolerance,
              is_training=True,
              reuse=False,
              gpu_mode=True)

# train loop:
print("train", "step", "loss", "recon_loss", "kl_loss")
for epoch in range(NUM_EPOCH):
  np.random.shuffle(dataset)
  for idx in range(num_batches):
    batch = dataset[idx*batch_size:(idx+1)*batch_size]

    obs = batch.astype(np.float)

    feed = {vae.x: obs,}
Ejemplo n.º 20
0
output_dir = "vae_test_result"

z_size=32

filelist = os.listdir(DATA_DIR)
filelist = [f for f in filelist if '.npz' in f]

obs = np.load(os.path.join(DATA_DIR, random.choice(filelist)))["obs"]
obs = np.expand_dims(obs, axis=-1)
obs = obs.astype(np.float32)/255.0

n = len(obs)

vae = ConvVAE(z_size=z_size,
              batch_size=1,
              is_training=False,
              reuse=False,
              gpu_mode=False)

vae.load_json(os.path.join(model_path_name, 'vae.json'))

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

print(n, "images loaded")
for i in range(n):
    frame = obs[i].reshape(1, 64, 64, 1)
    batch_z = vae.encode(frame)
    reconstruct = vae.decode(batch_z)
    imsave(output_dir+'/%s.png' % pad_num(i), 255.*frame[0].reshape(64, 64))
    imsave(output_dir+'/%s_vae.png' % pad_num(i), 255.*reconstruct[0].reshape(64, 64))
Ejemplo n.º 21
0
def main( dirs, z_size=32, batch_size=100, learning_rate=0.0001, kl_tolerance=0.5, epochs=100, save_model=False, verbose=True, optimizer="Adam" ):

    if save_model:
        model_save_path = "tf_vae"
        if not os.path.exists(model_save_path):
          os.makedirs(model_save_path)

    gen = DriveDataGenerator(dirs, image_size=(64,64), batch_size=batch_size, shuffle=True, max_load=10000, images_only=True )
        
    num_batches = len(gen)

    reset_graph()

    vae = ConvVAE(z_size=z_size,
                  batch_size=batch_size,
                  learning_rate=learning_rate,
                  kl_tolerance=kl_tolerance,
                  is_training=True,
                  reuse=False,
                  gpu_mode=True,
                  optimizer=optimizer)

    early = EarlyStopping(monitor='loss', min_delta=0.1, patience=5, verbose=verbose, mode='auto')
    early.set_model(vae)
    early.on_train_begin()

    best_loss = sys.maxsize

    if verbose:
        print("epoch\tstep\tloss\trecon_loss\tkl_loss")
    for epoch in range(epochs):
        for idx in range(num_batches):
            batch = gen[idx]

            obs = batch.astype(np.float)/255.0

            feed = {vae.x: obs,}

            (train_loss, r_loss, kl_loss, train_step, _) = vae.sess.run([
              vae.loss, vae.r_loss, vae.kl_loss, vae.global_step, vae.train_op
            ], feed)
            
            if train_loss < best_loss:
                best_loss = train_loss

            if save_model:
                if ((train_step+1) % 5000 == 0):
                  vae.save_json("tf_vae/vae.json")
        if verbose:
            print("{} of {}\t{}\t{:.2f}\t{:.2f}\t{:.2f}".format( epoch, epochs, (train_step+1), train_loss, r_loss, kl_loss) )
        gen.on_epoch_end()
        early.on_epoch_end(epoch, logs={"loss": train_loss})
        if vae.stop_training:
            break
    early.on_train_end()


# finished, final model:
    if save_model:
        vae.save_json("tf_vae/vae.json")

    return best_loss
Ejemplo n.º 22
0
filelist = os.listdir(DATA_DIR)
filelist.sort()
filelist = np.array(filelist[:10000])
dataset = load_raw_data_list(filelist)
dataset = create_dataset(dataset)

total_episodes = len(filelist)
ep_batches = int(np.floor(total_episodes / episode_num))
print("ep_batches", ep_batches)

reset_graph()

vae = ConvVAE(z_size=z_size,
              batch_size=batch_size,
              learning_rate=learning_rate,
              kl_tolerance=kl_tolerance,
              is_training=True,
              reuse=False,
              gpu_mode=tf.test.is_gpu_available())

print("train", "step", "loss", "recon_loss", "kl_loss")
for epoch in range(NUM_EPOCH):
    np.random.shuffle(filelist)
    for idx in range(ep_batches):
        ep_batch_list = filelist[idx * episode_num:(idx + 1) * episode_num]

        ep_batch = load_raw_data_list(ep_batch_list)
        total_length = count_length_of_raw_data(ep_batch)
        num_batches = int(np.floor(total_length / batch_size))

        ep_batch = create_dataset(ep_batch)
Ejemplo n.º 23
0
def sample_vae2(args):
    """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git
    """
    z_size = 64  # This needs to match the size of the trained vae
    batch_size = args.count
    learning_rate = 0.0001
    kl_tolerance = 0.5
    model_path_name = "tf_vae"

    reset_graph()
    vae = ConvVAE(
        z_size=z_size,
        batch_size=batch_size,
        learning_rate=learning_rate,
        kl_tolerance=kl_tolerance,
        is_training=False,
        reuse=False,
        gpu_mode=False)  # use GPU on batchsize of 1000 -> much faster

    vae.load_json(os.path.join(model_path_name, 'vae.json'))

    z = np.random.normal(size=(args.count, z_size))
    samples = vae.decode(z)
    input_dim = samples.shape[1:]

    gen = DriveDataGenerator(args.dirs,
                             image_size=(64, 64),
                             batch_size=args.count,
                             shuffle=True,
                             max_load=10000,
                             images_only=True)
    orig = gen[0].astype(np.float) / 255.0
    #mu, logvar = vae.encode_mu_logvar(orig)
    #recon = vae.decode( mu )
    recon = vae.decode(vae.encode(orig))

    n = args.count
    plt.figure(figsize=(20, 6), tight_layout=False)
    plt.title('VAE samples')
    for i in range(n):
        ax = plt.subplot(3, n, i + 1)
        plt.imshow(samples[i].reshape(input_dim[0], input_dim[1],
                                      input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if 0 == i:
            ax.set_title("Random")

    for i in range(n):
        ax = plt.subplot(3, n, n + i + 1)
        plt.imshow(orig[i].reshape(input_dim[0], input_dim[1], input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if 0 == i:
            ax.set_title("Real")

        ax = plt.subplot(3, n, (2 * n) + i + 1)
        plt.imshow(recon[i].reshape(input_dim[0], input_dim[1], input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if 0 == i:
            ax.set_title("Reconstructed")

    plt.savefig("samples_vae.png")
    plt.show()
Ejemplo n.º 24
0
from baselines.ddpg.ddpg_learner import DDPG
from baselines.ddpg.models import Actor, Critic
from baselines.ddpg.memory import Memory
from baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
from baselines.common import set_global_seeds
import baselines.common.tf_util as U

from baselines import logger
import numpy as np

try:
    from mpi4py import MPI
except ImportError:
    MPI = None

vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
vae.load_json('vae/vae.json')
rnn.load_json('rnn/rnn.json')


def learn(network, env,
          seed=None,
          total_timesteps=None,
          nb_epochs=None, # with default settings, perform 1M steps total
          nb_epoch_cycles=20,
          nb_rollout_steps=100,
          reward_scale=1.0,
          render=False,
          render_eval=False,
          noise_type='adaptive-param_0.2',