def build_model_single_gpu(self, gpu_idx): assert not self.y_dim if gpu_idx == 0: filename_queue = tf.train.string_input_producer([filename]) # num_epochs=self.config.epoch) self.get_image, self.get_label = read_and_decode_with_labels(filename_queue) with tf.variable_scope("misc"): chance = 1. # TODO: declare this down below and make it 1. - 1. / num_classes avg_error_rate = tf.get_variable('avg_error_rate', [], initializer=tf.constant_initializer(0.), trainable=False) num_error_rate = tf.get_variable('num_error_rate', [], initializer=tf.constant_initializer(0.), trainable=False) images, sparse_labels = tf.train.shuffle_batch([self.get_image, self.get_label], batch_size=self.batch_size, num_threads=2, capacity=1000 + 3 * self.batch_size, min_after_dequeue=1000, name='real_images_and_labels') if gpu_idx == 0: self.sample_images= tf.placeholder(tf.float32, [self.sample_size] + self.image_shape, name='sample_images') self.sample_labels = tf.placeholder(tf.int32, [self.sample_size], name="sample_labels") self.reference_G, self.reference_zs = self.generator(is_ref=True) # Since I don't know how to turn variable reuse off, I can only activate it once. # So here I build a dummy copy of the discriminator before turning variable reuse on for the generator. dummy_joint = tf.concat(0, [images, self.reference_G]) dummy = self.discriminator(dummy_joint, reuse=False, prefix="dummy") G, zs = self.generator(is_ref=False) if gpu_idx == 0: G_means = tf.reduce_mean(G, 0, keep_dims=True) G_vars = tf.reduce_mean(tf.square(G - G_means), 0, keep_dims=True) G = tf.Print(G, [tf.reduce_mean(G_means), tf.reduce_mean(G_vars)], "generator mean and average var", first_n=1) image_means = tf.reduce_mean(images, 0, keep_dims=True) image_vars = tf.reduce_mean(tf.square(images - image_means), 0, keep_dims=True) images = tf.Print(images, [tf.reduce_mean(image_means), tf.reduce_mean(image_vars)], "image mean and average var", first_n=1) self.Gs = [] self.zses = [] self.Gs.append(G) self.zses.append(zs) joint = tf.concat(0, [images, G]) class_logits, D_on_data, D_on_data_logits, D_on_G, D_on_G_logits = self.discriminator(joint, reuse=True, prefix="joint ") # D_on_G_logits = tf.Print(D_on_G_logits, [D_on_G_logits], "D_on_G_logits") self.d_sum = tf.histogram_summary("d", D_on_data) self.d__sum = tf.histogram_summary("d_", D_on_G) self.G_sum = tf.image_summary("G", G) d_label_smooth = self.d_label_smooth d_loss_real = sigmoid_kl_with_logits(D_on_data_logits, 1. - d_label_smooth) class_loss_weight = 1. d_loss_class = class_loss_weight * tf.nn.sparse_softmax_cross_entropy_with_logits(class_logits, tf.to_int64(sparse_labels)) error_rate = 1. - tf.reduce_mean(tf.to_float(tf.nn.in_top_k(class_logits, sparse_labels, 1))) # self.d_loss_class = tf.Print(self.d_loss_class, [error_rate], "gpu " + str(gpu_idx) + " current minibatch error rate") if gpu_idx == 0: update = tf.assign(num_error_rate, num_error_rate + 1.) with tf.control_dependencies([update]): # Start off as a true average for 1st 100 samples # Then switch to a running average to compensate for ongoing learning tc = tf.maximum(.01, 1. / num_error_rate) update = tf.assign(avg_error_rate, (1. - tc) * avg_error_rate + tc * error_rate) with tf.control_dependencies([update]): d_loss_class = tf.Print(d_loss_class, [avg_error_rate], "running top-1 error rate") # Do not smooth the negative targets. # If we use positive targets of alpha and negative targets of beta, # then the optimal discriminator function is D(x) = (alpha p_data(x) + beta p_generator(x)) / (p_data(x) + p_generator(x)). # This means if we want to get less extreme values, we shrink alpha. # Increasing beta makes the generator self-reinforcing. # Note that using this one-sided label smoothing also shifts the equilibrium # value to alpha/2. d_loss_fake = tf.nn.sigmoid_cross_entropy_with_logits(D_on_G_logits, tf.zeros_like(D_on_G_logits)) g_loss = sigmoid_kl_with_logits(D_on_G_logits, self.generator_target_prob) d_loss_class = tf.reduce_mean(d_loss_class) d_loss_real = tf.reduce_mean(d_loss_real) d_loss_fake = tf.reduce_mean(d_loss_fake) g_loss = tf.reduce_mean(g_loss) if gpu_idx == 0: self.g_losses = [] self.g_losses.append(g_loss) d_loss = d_loss_real + d_loss_fake + d_loss_class if gpu_idx == 0: self.d_loss_reals = [] self.d_loss_fakes = [] self.d_loss_classes = [] self.d_losses = [] self.d_loss_reals.append(d_loss_real) self.d_loss_fakes.append(d_loss_fake) self.d_loss_classes.append(d_loss_class) self.d_losses.append(d_loss) # self.g_loss_sum = tf.scalar_summary("g_loss", self.g_loss) # self.d_loss_sum = tf.scalar_summary("d_loss", self.d_loss) if gpu_idx == 0: get_vars(self)
def build_model_single_gpu(self, gpu_idx): assert not self.y_dim if gpu_idx == 0: filename_queue = tf.train.string_input_producer( [filename]) # num_epochs=self.config.epoch) self.get_image, self.get_label = read_and_decode_with_labels( filename_queue) with tf.variable_scope("misc"): chance = 1. # TODO: declare this down below and make it 1. - 1. / num_classes avg_error_rate = tf.get_variable( 'avg_error_rate', [], initializer=tf.constant_initializer(0.), trainable=False) num_error_rate = tf.get_variable( 'num_error_rate', [], initializer=tf.constant_initializer(0.), trainable=False) images, sparse_labels = tf.train.shuffle_batch( [self.get_image, self.get_label], batch_size=self.batch_size, num_threads=2, capacity=1000 + 3 * self.batch_size, min_after_dequeue=1000, name='real_images_and_labels') if gpu_idx == 0: self.sample_images = tf.placeholder(tf.float32, [self.sample_size] + self.image_shape, name='sample_images') self.sample_labels = tf.placeholder(tf.int32, [self.sample_size], name="sample_labels") self.reference_G, self.reference_zs = self.generator(is_ref=True) # Since I don't know how to turn variable reuse off, I can only activate it once. # So here I build a dummy copy of the discriminator before turning variable reuse on for the generator. dummy_joint = tf.concat([images, self.reference_G], 0) dummy = self.discriminator(dummy_joint, reuse=False, prefix="dummy") G, zs = self.generator(is_ref=False) if gpu_idx == 0: G_means = tf.reduce_mean(G, 0, keepdims=True) G_vars = tf.reduce_mean(tf.square(G - G_means), 0, keepdims=True) G = tf.Print(G, [tf.reduce_mean(G_means), tf.reduce_mean(G_vars)], "generator mean and average var", first_n=1) image_means = tf.reduce_mean(images, 0, keepdims=True) image_vars = tf.reduce_mean(tf.square(images - image_means), 0, keepdims=True) images = tf.Print( images, [tf.reduce_mean(image_means), tf.reduce_mean(image_vars)], "image mean and average var", first_n=1) self.Gs = [] self.zses = [] self.Gs.append(G) self.zses.append(zs) joint = tf.concat([images, G], 0) class_logits, D_on_data, D_on_data_logits, D_on_G, D_on_G_logits = self.discriminator( joint, reuse=True, prefix="joint ") # D_on_G_logits = tf.Print(D_on_G_logits, [D_on_G_logits], "D_on_G_logits") self.d_sum = tf.summary.histogram("d", D_on_data) self.d__sum = tf.summary.histogram("d_", D_on_G) self.G_sum = tf.summary.image("G", G) d_label_smooth = self.d_label_smooth d_loss_real = sigmoid_kl_with_logits(D_on_data_logits, 1. - d_label_smooth) class_loss_weight = 1. d_loss_class = class_loss_weight * tf.nn.sparse_softmax_cross_entropy_with_logits( logits=class_logits, labels=tf.to_int64(sparse_labels)) error_rate = 1. - tf.reduce_mean( tf.to_float(tf.nn.in_top_k(class_logits, sparse_labels, 1))) # self.d_loss_class = tf.Print(self.d_loss_class, [error_rate], "gpu " + str(gpu_idx) + " current minibatch error rate") if gpu_idx == 0: update = tf.assign(num_error_rate, num_error_rate + 1.) with tf.control_dependencies([update]): # Start off as a true average for 1st 100 samples # Then switch to a running average to compensate for ongoing learning tc = tf.maximum(.01, 1. / num_error_rate) update = tf.assign(avg_error_rate, (1. - tc) * avg_error_rate + tc * error_rate) with tf.control_dependencies([update]): d_loss_class = tf.Print(d_loss_class, [avg_error_rate], "running top-1 error rate") # Do not smooth the negative targets. # If we use positive targets of alpha and negative targets of beta, # then the optimal discriminator function is D(x) = (alpha p_data(x) + beta p_generator(x)) / (p_data(x) + p_generator(x)). # This means if we want to get less extreme values, we shrink alpha. # Increasing beta makes the generator self-reinforcing. # Note that using this one-sided label smoothing also shifts the equilibrium # value to alpha/2. d_loss_fake = tf.nn.sigmoid_cross_entropy_with_logits( logits=D_on_G_logits, labels=tf.zeros_like(D_on_G_logits)) g_loss = sigmoid_kl_with_logits(D_on_G_logits, self.generator_target_prob) d_loss_class = tf.reduce_mean(d_loss_class) d_loss_real = tf.reduce_mean(d_loss_real) d_loss_fake = tf.reduce_mean(d_loss_fake) g_loss = tf.reduce_mean(g_loss) if gpu_idx == 0: self.g_losses = [] self.g_losses.append(g_loss) d_loss = d_loss_real + d_loss_fake + d_loss_class if gpu_idx == 0: self.d_loss_reals = [] self.d_loss_fakes = [] self.d_loss_classes = [] self.d_losses = [] self.d_loss_reals.append(d_loss_real) self.d_loss_fakes.append(d_loss_fake) self.d_loss_classes.append(d_loss_class) self.d_losses.append(d_loss) # self.g_loss_sum = tf.scalar_summary("g_loss", self.g_loss) # self.d_loss_sum = tf.scalar_summary("d_loss", self.d_loss) if gpu_idx == 0: get_vars(self)
def __init__(self, max_length, state_size, output_size, hidden, n_step, batch_size, gamma, lr, train_size, update_size, activation): self.epsilon = 1.0 self.sess = tf.Session() self.max_length = max_length self.state_size = state_size self.output_size = output_size self.hidden = hidden self.n_step = n_step self.batch_size = batch_size self.gamma = gamma self.lr = lr self.train_size = train_size self.update_size = update_size self.tau = 0.995 self.activation = activation self.n_step_buffer = buffer.n_step_buffer(n_step=self.n_step) self.memory = buffer.Memory(capacity=int(self.max_length)) self.state_shape = [None] if type(self.state_size) == int: self.state_shape.append(self.state_size) elif type(self.state_size) == list: self.state_shape.extend(self.state_size) self.G_ph = tf.placeholder(tf.float32, shape=[None]) self.x_ph = tf.placeholder(tf.float32, shape=self.state_shape) self.a_ph = tf.placeholder(tf.int32, shape=[None]) self.w = tf.placeholder(tf.float32, shape=[None]) if type(self.state_size) == int: with tf.variable_scope('main'): self.main = model.dueling(self.x_ph, self.hidden, self.activation, self.output_size) with tf.variable_scope('target'): self.target = model.dueling(self.x_ph, self.hidden, self.activation, self.output_size) elif type(self.state_size) == list: with tf.variable_scope('main'): self.main = model.cnn_dueling(self.x_ph, self.hidden, self.activation, self.output_size) with tf.variable_scope('target'): self.target = model.cnn_dueling(self.x_ph, self.hidden, self.activation, self.output_size) self.one_hot_a_ph = tf.one_hot(self.a_ph, depth=self.output_size) self.main_q_value = tf.reduce_sum(self.main * self.one_hot_a_ph, axis=1) self.unweighted_loss = ((self.G_ph - self.main_q_value)**2) * 0.5 self.per_loss = tf.reduce_mean(self.unweighted_loss * self.w) self.per_optimizer = tf.train.AdamOptimizer(self.lr) self.per_train = self.per_optimizer.minimize(self.per_loss) self.main_params = model.get_vars('main') self.target_params = model.get_vars('target') self.op_holder = tf.group([ tf.assign(v_targ, v_main) for v_main, v_targ in zip( model.get_vars('main'), model.get_vars('target')) ]) self.target_update = tf.group([ tf.assign(v_targ, self.tau * v_targ + (1 - self.tau) * v_main) for v_main, v_targ in zip(model.get_vars('main'), model.get_vars('target')) ]) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() self.from_list_main = [ tf.placeholder(tf.float32, i.get_shape()) for i in self.main_params ] self.from_list_target = [ tf.placeholder(tf.float32, i.get_shape()) for i in self.target_params ] self.write_main_parameter = tf.group([ tf.assign(v_targ, v_main) for v_main, v_targ in zip(self.from_list_main, self.main_params) ]) self.write_target_parameter = tf.group([ tf.assign(v_targ, v_main) for v_main, v_targ in zip( self.from_list_target, self.target_params) ])