def __init__(self, buffer_size, resolution, oauth, channel): self.oauth = oauth self.resolution = resolution self.channel = channel self.buffer_size = buffer_size self.buffer = RingBuffer(buffer_size=buffer_size) self.initialized = False self.stream = None
def test_len(self): buf = RingBuffer(3) buf.append("test1") self.assertEqual(len(buf), 1) buf.append("test2") self.assertEqual(len(buf), 2) buf.append("test3") self.assertEqual(len(buf), 3) buf.append("test4") self.assertEqual(len(buf), 3)
def __init__(self, env, action_size, config): self.memory = RingBuffer(int( config.config_section_map()['memorysize'])) self.gamma = float( config.config_section_map()['gamma']) # discount rate self.epsilon = float( config.config_section_map()['epsilon']) # exploration rate self.epsilon_min = float(config.config_section_map()['epsilonmin']) self.epsilon_decay = float(config.config_section_map()['epsilondecay']) self.learning_rate = float(config.config_section_map()['learningrate']) self.action_size = action_size self.env = env self.dqn_model = DQNModel(self.learning_rate, action_size)
def __init__(self, window): self.window = window self.buffer = RingBuffer(window) self.reward = 1.0 self.lowest = self.reward for i in xrange(0, window): self.update(True)
def test_get_item_full(self): buf = RingBuffer(3) buf.append("test1") buf.append("test2") buf.append("test3") buf.append("test4") buf.append("test5") self.assertEqual(buf[0], "test3") self.assertEqual(buf[1], "test4") self.assertEqual(buf[2], "test5")
class RewardAndPunishment(): """Reward and Punishment mechanism inspired by "Modeling and Assessing Quality of Information in Multisensor Multimedia Monitoring Systems" by Hoassain et al. """ def __init__(self, window): self.window = window self.buffer = RingBuffer(window) self.reward = 1.0 self.lowest = self.reward for i in xrange(0, window): self.update(True) def update(self, truthHold): alpha_w_minus_1 = float(len(filter(lambda x: x == 1, self.buffer.items[1:]))) w_minus_1 = self.buffer.fillLevel() - 1 alpha = 0.0 if truthHold != 0: alpha = 1.0 self.buffer.add(alpha) if w_minus_1 > 0: r_p = (alpha_w_minus_1 / w_minus_1) - ((alpha_w_minus_1 + alpha) / (w_minus_1 + 1)) # r_p = (alpha_w_minus_1 / w_minus_1) - ((alpha_w_minus_1 + alpha) / (self.window)) self.reward -= 2 * r_p self.lowest = min(self.lowest, self.reward) # r_p = (alpha_w_minus_1 + alpha) / (w_minus_1) # self.reward = r_p # print alpha_w_minus_1, w_minus_1, ":", truthHold, self.reward, r_p, "--> %f" % self.value(), "lowest %f" % self.lowest # print "%s %d/%d: %.3f --> %.3f (lowest %.3f)" % (truthHold, alpha_w_minus_1, w_minus_1, r_p, self.reward, self.lowest) else: if truthHold: self.reward = 1.0 else: self.reward = 0.0 def value(self): if abs(self.reward) < 0: return 0 elif abs(self.reward) > 1: return 1 return abs(self.reward) # * 2 - 1
def test_iter(self): buf = RingBuffer(3) buf.append("test1") buf.append("test2") buf.append("test3") buf.append("test4") buf.append("test5") actual1 = [] for i in buf: actual1.append(i) self.assertListEqual(actual1, ["test3", "test4", "test5"]) actual2 = [] for i in buf: actual2.append(i) self.assertListEqual(actual2, ["test3", "test4", "test5"])
def test_threading(self): buf = RingBuffer(3) buf.append("test1") buf.append("test2") buf.append("test3") thread1 = self.__class__.TestIterThreading(buf) thread2 = self.__class__.TestIterThreading(buf) thread1.start() thread2.start() thread1.join(3) thread2.join(3) self.assertListEqual(thread1.actual, ["test1", "test2", "test3"]) self.assertListEqual(thread2.actual, ["test1", "test2", "test3"])
def test_get_item_out_of_index(self): try: buf = RingBuffer(1) buf[1] except BaseException as e: self.assertIsInstance(e, IndexError)
def test_get_item_key_is_not_int(self): try: buf = RingBuffer(1) buf["0"] except BaseException as e: self.assertIsInstance(e, TypeError)
class DQNAgent: def __init__(self, env, action_size, config): self.memory = RingBuffer(int( config.config_section_map()['memorysize'])) self.gamma = float( config.config_section_map()['gamma']) # discount rate self.epsilon = float( config.config_section_map()['epsilon']) # exploration rate self.epsilon_min = float(config.config_section_map()['epsilonmin']) self.epsilon_decay = float(config.config_section_map()['epsilondecay']) self.learning_rate = float(config.config_section_map()['learningrate']) self.action_size = action_size self.env = env self.dqn_model = DQNModel(self.learning_rate, action_size) def remember(self, state, action, reward, next_state, done): state = state.astype('uint8') next_state = next_state.astype('uint8') reward = np.sign(reward) self.memory.append((state, action, reward, next_state, done)) def action(self, fi_t, env_sample, csv_handler): num_random = random.uniform(0, 1) if num_random <= self.epsilon: # with probability epsilon do a random action return env_sample else: fi_t = np.expand_dims(fi_t, axis=0) action = self.dqn_model.model.predict( [fi_t, np.ones([1, self.action_size])]) csv_handler.write_q_values(action) return np.argmax(action[0]) def replay(self, batch_size, csv_logger): states = np.zeros((batch_size, 4, 84, 84), dtype='float32') actions = np.zeros((batch_size, 4), dtype='uint8') rewards = np.zeros(batch_size, dtype='float32') next_states = np.zeros((batch_size, 4, 84, 84), dtype='float32') dones = np.ones((batch_size, 4), dtype=bool) mini_batch = self.get_minibatch( batch_size) # sample random mini_batch from D i = 0 for state, action, reward, next_state, done in mini_batch: next_state = next_state.astype('float32') state = state.astype('float32') states[i] = state actions[i][action] = 1 rewards[i] = reward next_states[i] = next_state dones[i] = [done, done, done, done] i += 1 next_state_q_values = self.dqn_model.target_model.predict( [next_states, np.ones(actions.shape)]) next_state_q_values[dones] = 0 q_values = rewards + self.gamma * np.max(next_state_q_values, axis=1) # Trains the model for a fixed number of epochs (iterations on a dataset) self.dqn_model.model.fit([states, actions], actions * q_values[:, None], batch_size=batch_size, verbose=0, callbacks=[csv_logger]) def get_minibatch(self, batch_size): mini_batch = [] for i in range(batch_size): index = randint(0, self.memory.__len__() - 1) mini_batch.append(self.memory.__getitem__(index)) return mini_batch def load(self, name): self.dqn_model.model.load_weights(name) self.dqn_model.update_target_model() def save(self, name): self.dqn_model.model.save_weights(name) def decrease_epsilone(self): if self.epsilon > self.epsilon_min: self.epsilon -= self.epsilon_decay
class Twitch(object): RING_BUFFER_SIZE_KEY = 'ringbuffer-size' OAUTH_TOKEN_KEY = 'oauth_token' LIVESTREAMER_PLUGIN_TWITCH = 'twitch' def __init__(self, buffer_size, resolution, oauth, channel): self.oauth = oauth self.resolution = resolution self.channel = channel self.buffer_size = buffer_size self.buffer = RingBuffer(buffer_size=buffer_size) self.initialized = False self.stream = None def __del__(self): if self.initialized: self.stream.close() def initialize(self): self.buffer.clear() stream = self._init_stream(self.oauth, self.channel) if stream: self.initialized = True self.stream = stream.open() def get_stream_data(self): if not self.initialized: print('Read: Try to initialize') self.initialize() raise StreamBufferIsEmptyException return self.buffer.read_all() def update_stream_data(self): if self.initialized: data = self.stream.read(self.buffer_size) print('Update: {length}'.format(length=len(data))) if len(data) != 0: self.buffer.write(data) else: print('Update: Try to initialize') self.initialize() else: print('Update: Try to initialize') self.initialize() def stream_initialized(self): return self.stream is not None def _init_stream(self, oauth, channel): session = Livestreamer() session.set_plugin_option(self.LIVESTREAMER_PLUGIN_TWITCH, self.OAUTH_TOKEN_KEY, oauth) session.set_option(self.RING_BUFFER_SIZE_KEY, self.buffer_size) streams = session.streams(self._generate_stream_url(channel)) return streams.get(self.resolution) @staticmethod def _generate_stream_url(channel): return 'https://www.twitch.tv/{channel}'.format(channel=channel)
def main(argv): del argv config = FLAGS tf.set_random_seed(config.seed) np_state = np.random.RandomState(config.seed) global_step = tf.train.get_or_create_global_step() global_step_update = tf.assign(global_step, global_step + 1) real_ds = tf.data.TFRecordDataset(config.input_path) real_ds = real_ds.map(lambda x: _parse_record(x, config.image_size)) real_ds = real_ds.shuffle(buffer_size=1000) real_ds = real_ds.batch(config.batch_size // 2) # Half will be generated real_ds = real_ds.repeat() real_ds_iterator = real_ds.make_one_shot_iterator() real_ds_example = real_ds_iterator.get_next() discriminator = Discriminator('discriminator') generator = Generator('generator') z = tf.placeholder(dtype=tf.float32, shape=[None, 100]) G_sample = generator.create_main_graph(z) D_logit_real = discriminator.create_main_graph(real_ds_example) D_logit_fake = discriminator.create_main_graph(G_sample) D_expected_real = tf.zeros_like(D_logit_real) D_expected_fake = tf.ones_like(D_logit_fake) D_loss_real = tf.losses.sigmoid_cross_entropy(D_expected_real, D_logit_real, label_smoothing=0.2) D_loss_fake = tf.losses.sigmoid_cross_entropy(D_expected_fake, D_logit_fake, label_smoothing=0.00) D_loss = 0.5 * (D_loss_real + D_loss_fake) G_loss = tf.losses.sigmoid_cross_entropy(tf.zeros_like(D_logit_fake), D_logit_fake, label_smoothing=0.00) with tf.variable_scope('metrics'): D_prediction_real = tf.round(tf.nn.sigmoid(D_logit_real)) D_prediction_fake = tf.round(tf.nn.sigmoid(D_logit_fake)) D_accuracy_real = accuracy(D_prediction_real, D_expected_real) D_accuracy_fake = accuracy(D_prediction_fake, D_expected_fake) real_size = tf.to_float(tf.shape(D_prediction_real)[0]) fake_size = tf.to_float(tf.shape(D_prediction_fake)[0]) D_accuracy = (real_size * D_accuracy_real + fake_size * D_accuracy_fake) / (real_size + fake_size) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='discriminator') with tf.control_dependencies(update_ops): D_optimizer = tf.train.AdamOptimizer( config.discriminator_learning_rate).minimize( D_loss, var_list=discriminator.get_variables()) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='generator') with tf.control_dependencies(update_ops): G_optimizer = tf.train.AdamOptimizer( config.generator_learning_rate).minimize( G_loss, var_list=generator.get_variables()) with tf.variable_scope('summaries'): D_loss_summary = tf.summary.scalar('loss', D_loss, family='discriminator') D_accuracy_real_summary = tf.summary.scalar('real_accuracy', D_accuracy_real, family='discriminator') D_accuracy_fake_summary = tf.summary.scalar('fake_accuracy', D_accuracy_fake, family='discriminator') D_accuracy_summary = tf.summary.scalar('accuracy', D_accuracy, family='discriminator') G_loss_summary = tf.summary.scalar('loss', G_loss, family='generator') G_image_summary = tf.summary.image('generation', G_sample, max_outputs=1, family='generator') Real_image_summary = tf.summary.image('real', real_ds_example, max_outputs=1) summary_op = tf.summary.merge_all() # Session hooks = [] hooks.append(tf.train.StopAtStepHook(num_steps=config.iterations)) if (config.save_checkpoints): hooks.append( tf.train.CheckpointSaverHook( checkpoint_dir=config.checkpoint_directory, save_secs=config.checkpoint_save_secs, save_steps=config.checkpoint_save_steps)) if (config.save_summaries): hooks.append( tf.train.SummarySaverHook(output_dir=config.summary_directory, save_secs=config.summary_save_secs, save_steps=config.summary_save_steps, summary_op=summary_op)) if config.restore: sess = tf.train.MonitoredTrainingSession( checkpoint_dir=config.checkpoint_directory, save_checkpoint_steps=None, save_checkpoint_secs=None, save_summaries_steps=None, save_summaries_secs=None, log_step_count_steps=None, hooks=hooks) else: sess = tf.train.MonitoredTrainingSession(save_checkpoint_steps=None, save_checkpoint_secs=None, save_summaries_steps=None, save_summaries_secs=None, log_step_count_steps=None, hooks=hooks) def step_generator(step_context, accuracy_buffer): np_global_step = step_context.session.run(global_step) step_context.session.run(global_step_update) random_noise = np_state.normal(size=[config.batch_size, 100]) _, np_loss, np_accuracy = step_context.run_with_hooks( [G_optimizer, G_loss, D_accuracy], feed_dict={z: random_noise}) accuracy_buffer.add(np_accuracy) if np_global_step % config.log_step == 0: logging.debug( 'Training Generator: Step: {} Loss: {:.3e} Accuracy: {:.2f}' .format(np_global_step, np_loss, accuracy_buffer.mean() * 100)) def step_discriminator(step_context, accuracy_buffer): np_global_step = step_context.session.run(global_step) step_context.session.run(global_step_update) random_noise = np_state.normal(size=[config.batch_size // 2, 100]) _, np_loss, np_accuracy = step_context.run_with_hooks( [D_optimizer, D_loss, D_accuracy], feed_dict={z: random_noise}) accuracy_buffer.add(np_accuracy) if np_global_step % config.log_step == 0: logging.debug( 'Training Discriminator: Step: {} Loss Mean: {:.3e} Accuracy: {:.2f}' .format(np_global_step, np_loss, accuracy_buffer.mean() * 100)) accuracy_buffer = RingBuffer(config.buffer_size) accuracy_buffer.clear() while not sess.should_stop(): for _ in xrange(config.D_steps): sess.run_step_fn(lambda step_context: step_discriminator( step_context, accuracy_buffer)) for _ in xrange(config.G_steps): sess.run_step_fn(lambda step_context: step_generator( step_context, accuracy_buffer)) sess.close()