def test_large_var(self): ### python replay_memory_test.py TestReplayMemory.test_large_var s = StopWatch() state_shape = (50, 50, 6) s.reset() rm = ReplayMemory(self.sess, buffer_size=10000, state_shape=state_shape, action_dim=2, load_factor=1.5) self.sess.run(tf.initialize_all_variables()) print "cstr_and_init", s.time() bs1, bs1i, bs2, bs2i = rm.batch_ops() # build a simple, useless, net that uses state_1 & state_2 idxs # we want this to reduce to a single value to minimise data coming # back from GPU added_states = bs1 + bs2 total_value = tf.reduce_sum(added_states) def random_s(): return np.random.random(state_shape) for i in xrange(10): # add an episode to rm episode_len = random.choice([5, 7, 9, 10, 15]) initial_state = random_s() action_reward_state = [] for i in range(i + 1, i + episode_len + 1): a, r, s2 = (i * 10) + 7, (i * 10) + 8, random_s() action_reward_state.append((a, r, s2)) start = time.time() s.reset() rm.add_episode(initial_state, action_reward_state) t = s.time() num_states = len(action_reward_state) + 1 print "add_episode_time", t, "#states=", num_states, "=> s/state", t / num_states i += episode_len + 1 # get a random batch state b = rm.batch(batch_size=128) s.reset() x = self.sess.run(total_value, feed_dict={ bs1i: b.state_1_idx, bs2i: b.state_2_idx }) print "fetch_and_run", x, s.time()