def is_duplicate(endpoints): """Implements a simple duplicate filter, based on L1 difference in RGB.""" return tf.greater( tf.reduce_mean(tf.abs(endpoints['rgb'][1] - endpoints['rgb'][0])), params.input.duplicates_filter_threshold)
def next_timestep(self, state, action): '''Calculate the next state of the quadcopter after one timestep Size of tensors' first dimension is the batch size for parallel computation Params ====== state: rank-2 tensor, state in the form [position,orientation,vel,ang_vel] action: rank-2 tensor, action commands in the form [climb,roll,pitch,yaw] Returns ====== rank-2 tensor, next state in the form [position,orientation,vel,ang_vel] ''' eta = state[:, 0:3] upsilon = state[:, 3:6] Tport = action[:, 0] Tstbd = action[:, 1] zeros = tf.zeros([self.batch_size], dtype=tf.float32) ones = tf.ones([self.batch_size], dtype=tf.float32) Xu = tf.where(tf.less(tf.abs(upsilon[:, 0]), 1.2), tf.constant(-0.25, dtype=tf.float32, shape=[ self.batch_size]), tf.constant(64.55, dtype=tf.float32, shape=[self.batch_size])) Xuu = tf.where(tf.less(tf.abs(upsilon[:, 0]), 1.2), tf.constant(0.0, dtype=tf.float32, shape=[ self.batch_size]), tf.constant(-70.92, dtype=tf.float32, shape=[self.batch_size])) Yv = 0.5*(-40*1000*tf.abs(upsilon[:, 1])) * \ (1.1+0.0045*(1.01/0.09) - 0.1*(0.27/0.09)+0.016*(tf.pow((0.27/0.09), 2))) Yr = 6*(-3.141592*1000) * \ tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01 Nv = 0.06*(-3.141592*1000) * \ tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01 Nr = 0.02*(-3.141592*1000) * \ tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01*1.01 M = tf.constant([[self.boat.physics.m - self.boat.physics.X_u_dot, 0, 0], [0, self.boat.physics.m - self.boat.physics.Y_v_dot, 0 - self.boat.physics.Y_r_dot], [0, 0 - self.boat.physics.N_v_dot, self.boat.physics.Iz - self.boat.physics.N_r_dot]]) T = tf.stack([Tport + self.boat.physics.c*Tstbd, zeros, 0.5 * self.boat.physics.B*(Tport - self.boat.physics.c*Tstbd)], axis=1) T = tf.reshape(T, [self.batch_size, 3, 1]) CRB = tf.stack([[zeros, zeros, -self.boat.physics.m * upsilon[:, 1]], [zeros, zeros, self.boat.physics.m * upsilon[:, 0]], [self.boat.physics.m * upsilon[:, 1], -self.boat.physics.m * upsilon[:, 0], zeros]]) CRB = tf.transpose(CRB, perm=[2, 0, 1]) CA = tf.stack([[zeros, zeros, 2 * ((self.boat.physics.Y_v_dot*upsilon[:, 1]) + ((self.boat.physics.Y_r_dot + self.boat.physics.N_v_dot)/2) * upsilon[:, 2])], [zeros, zeros, -self.boat.physics.X_u_dot * self.boat.physics.m * upsilon[:, 0]], [2*(((-self.boat.physics.Y_v_dot) * upsilon[:, 1]) - ((self.boat.physics.Y_r_dot+self.boat.physics.N_v_dot)/2) * upsilon[:, 2]), self.boat.physics.X_u_dot * self.boat.physics.m * upsilon[:, 0], zeros]]) CA = tf.transpose(CA, perm=[2, 0, 1]) C = CRB + CA Dl = tf.stack([[-Xu, zeros, zeros], [zeros, -Yv, -Yr], [zeros, -Nv, -Nr]]) Dl = tf.transpose(Dl, perm=[2, 0, 1]) Dn = tf.stack([[Xuu * abs(upsilon[:, 0]), zeros, zeros], [zeros, self.boat.physics.Yvv * tf.abs(upsilon[:, 1]) + self.boat.physics.Yvr * tf.abs(upsilon[:, 2]), self.boat.physics.Yrv * tf.abs(upsilon[:, 1]) + self.boat.physics.Yrr * tf.abs(upsilon[:, 2])], [zeros, self.boat.physics.Nvv * tf.abs(upsilon[:, 1]) + self.boat.physics.Nvr * tf.abs(upsilon[:, 2]), self.boat.physics.Nrv * tf.abs(upsilon[:, 1]) + self.boat.physics.Nrr * tf.abs(upsilon[:, 2])]]) Dn = tf.transpose(Dn, perm=[2, 0, 1]) D = Dl - Dn upsilon = tf.reshape(upsilon, [self.batch_size, 3, 1]) upsilon_dot = tf.matmul(tf.linalg.inv( M), (T - tf.matmul(C, upsilon) - tf.matmul(D, upsilon))) upsilon = (self.train_dt) * upsilon_dot + upsilon # integral J = tf.stack([[tf.cos(eta[:, 2]), -tf.sin(eta[:, 2]), zeros], [tf.sin(eta[:, 2]), tf.cos(eta[:, 2]), zeros], [zeros, zeros, ones]]) J = tf.transpose(J, perm=[2, 0, 1]) eta_dot = tf.matmul(J, upsilon) # transformation into local reference frame eta = tf.reshape(eta, [self.batch_size, 3, 1]) eta = (self.train_dt)*eta_dot + eta # integral print(eta[:,2]) eta_ajust = tf.cond((tf.abs(eta[:, 2])> np.pi), lambda: tf.multiply((eta[:,2]/tf.abs(eta[:,2])),(tf.abs(eta[:,2])-2*np.pi)), lambda: eta[:,2]) #print(lol) values = tf.where(tf.greater(tf.abs(eta[:, 2]), np.pi))#, (eta[:,2]/tf.abs(eta[:,2]))*(tf.abs(eta[:.2])-2*np.pi) , eta[:,2]) #eta[:, 2] = tf.where(tf.greater(tf.abs(eta[:, 2]), np.pi), (eta[:,2]/tf.abs(eta[:,2]))*(tf.abs(eta[:.2])-2*np.pi) , eta[:,2]) # eta[2] = (self.eta[2]/abs(self.eta[2]))*(abs(self.eta[2])-2*np.pi) #a_list = tf.unstack(eta) #print(a_list) #a_list[:,2]=lol indices = [[tf.range(10),2]] # A list of coordinates to update. shape = [10, 3] # The shape of the corresponding dense tensor, same as `c`. delta = tf.SparseTensor(indices, values, shape) result = eta + tf.sparse_tensor_to_dense(delta) #https://stackoverflow.com/questions/34685947/adjust-single-value-within-tensor-tensorflow eta = tf.reshape(eta, [self.batch_size, 3]) upsilon = tf.reshape(upsilon, [self.batch_size, 3]) next_state = tf.concat([eta, upsilon], axis=1) reward = self.get_reward(next_state) return next_state, reward
def compute_error(real, fake, mask): return tf.reduce_mean(mask * tf.abs(fake - real))
def EffectiveSampleSize(states, filter_beyond_lag=300, filter_threshold=0.05, center=True, normalize=True): """ESS computation for one single Tensor argument.""" def _axis_size(x, axis=None): """Get number of elements of `x` in `axis`, as type `x.dtype`.""" if axis is None: return tf.cast(tf.size(x), x.dtype) return tf.cast(tf.reduce_prod(tf.gather(tf.shape(x), axis)), x.dtype) with tf.name_scope( "effective_sample_size_single_state", values=[states, filter_beyond_lag, filter_threshold]): states = tf.convert_to_tensor(states, name="states") dt = states.dtype # filter_beyond_lag == None ==> auto_corr is the full sequence. auto_corr = SanitizedAutoCorrelation( states, axis=0, center=center, normalize=normalize, max_lags=filter_beyond_lag) auto_corr = tf.reduce_mean(auto_corr, 1) if filter_threshold is not None: filter_threshold = tf.convert_to_tensor( filter_threshold, dtype=dt, name="filter_threshold") # Get a binary mask to zero out values of auto_corr below the threshold. # mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i, # mask[i, ...] = 0, otherwise. # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...] # Building step by step, # Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2. # Step 1: mask = [False, False, True, False] mask = tf.abs(auto_corr) < filter_threshold # Step 2: mask = [0, 0, 1, 1] mask = tf.cast(mask, dtype=dt) # Step 3: mask = [0, 0, 1, 2] mask = tf.cumsum(mask, axis=0) # Step 4: mask = [1, 1, 0, 0] mask = tf.maximum(1. - mask, 0.) auto_corr *= mask # With R[k] := auto_corr[k, ...], # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]} # = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1) # approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]} # where M is the filter_beyond_lag truncation point chosen above. # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total # ndims the same as auto_corr n = _axis_size(states, axis=0) k = tf.range(0., _axis_size(auto_corr, axis=0)) nk_factor = (n - k) / n if auto_corr.shape.ndims is not None: new_shape = [-1] + [1] * (auto_corr.shape.ndims - 1) else: new_shape = tf.concat( ([-1], tf.ones([tf.rank(auto_corr) - 1], dtype=tf.int32)), axis=0) nk_factor = tf.reshape(nk_factor, new_shape) #return tf.reduce_mean(n / (-1 + 2 * tf.reduce_sum(nk_factor * auto_corr, axis=0)), 0) return n / (1.0 + 2 * tf.reduce_sum( nk_factor[1:, Ellipsis] * auto_corr[1:, Ellipsis], axis=0))
tf.disable_v2_behavior() import input_data #Build the Training Set mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) train_pixels, train_list_values = mnist.train.next_batch(100) test_pixels, test_list_of_values = mnist.test.next_batch(10) train_pixel_tensor = tf.placeholder("float", [None, 784]) test_pixel_tensor = tf.placeholder("float", [784]) #Cost Function and distance optimization distance = tf.reduce_sum(tf.abs( tf.add(train_pixel_tensor, tf.negative(test_pixel_tensor))), reduction_indices=1) pred = tf.arg_min(distance, 0) # Testing and algorithm evaluation accuracy = 0. init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) for i in range(len(test_list_of_values)): nn_index = sess.run(pred, feed_dict={ train_pixel_tensor: train_pixels, test_pixel_tensor: test_pixels[i, :]
def tf_blend(X, y, type_to_idx, lr, steps, do_individual_scores=True): """ Does linear combination of solutions, where the weights are positive and sum to 1. """ x = X.T n_samples, n_features = x.shape n_classes = len(type_to_idx) classes = np.zeros((n_samples, n_classes)) strat = np.zeros(n_samples, dtype=int) # Stratify CV by type for i, (type_, idx) in enumerate(type_to_idx.items()): classes[idx, i] = 1 strat[idx] = i # Initialize weights to zero to avoid bias logits_init = np.zeros((n_features, 1)) # Reset graph tf.reset_default_graph() # Tensorflow placeholders and variables classes_tf = tf.placeholder(tf.float32, [None, n_classes]) x_tf = tf.placeholder(tf.float32, [None, n_features]) logits_tf = tf.Variable(logits_init, dtype=tf.float32) W = tf.nn.softmax(logits_tf, axis=0) y_pred = tf.matmul(x_tf, W) y_tf = tf.placeholder(tf.float32, [None, 1]) abs_diff = tf.abs(y_tf - y_pred) class_diff = abs_diff * classes_tf cost = tf.reduce_sum(class_diff, axis=0) mean_cost = cost / (tf.reduce_sum(classes_tf, axis=0) + 1e-9) log_cost = tf.math.log(mean_cost + 1e-9) / n_classes total_cost = tf.reduce_sum(log_cost) test_size = 0.50 train_step = tf.train.AdamOptimizer(lr).minimize(total_cost) # Keep track of progress scores = [] running_weights = [] running_logits = [] # Get stratified train, test split train, test = sklearn.model_selection.train_test_split( np.arange(n_samples), stratify=strat, test_size=test_size, shuffle=True, random_state=42) batch_size = train.size / 10 average_steps = 50 if steps > 100 else 20 init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(steps): for batch in np.array_split(train, train.size // batch_size): feed = { x_tf: x[batch], y_tf: y[batch, None], classes_tf: classes[batch] } sess.run(train_step, feed_dict=feed) np.random.shuffle(train) if i > 0: weights, ensemble_loss, logits = sess.run( [W, log_cost, logits_tf], feed_dict={ x_tf: x[test], y_tf: y[test, None], classes_tf: classes[test] }) running_logits.append(logits) running_weights.append(weights) #print(i, sum(ensemble_loss), weights.squeeze()[:5]) # Average logits and get score and weights ensemble_logits = np.mean(running_logits[-average_steps:], 0) ensemble_loss, ensemble_weights = sess.run( [log_cost, W], feed_dict={ logits_tf: ensemble_logits, x_tf: x[test], y_tf: y[test, None], classes_tf: classes[test] }) scores.append(ensemble_loss) # Do individual scores if do_individual_scores: test_weights = np.zeros((n_features, 1)) # Calculate loss for individual solutions for i in range(n_features): test_weights[:] = 0 test_weights[i] = 1 test_loss = sess.run(log_cost, feed_dict={ W: test_weights, x_tf: x[test], y_tf: y[test, None], classes_tf: classes[test] }) scores.append(test_loss) return ensemble_weights, np.asarray(scores)
def sinc(x, threshold=1e-20): """Normalized zero phase version (peak at zero).""" x = tf_float32(x) x = tf.where(tf.abs(x) < threshold, threshold * tf.ones_like(x), x) x = np.pi * x return tf.sin(x) / x
def build_model(self, is_training=True, inst_norm=False, no_target_source=False): real_data = tf.placeholder(tf.float32, [ self.batch_size, self.input_width, self.input_width, self.input_filters + self.output_filters ], name='real_A_and_B_images') embedding_ids = tf.placeholder(tf.int64, shape=None, name="embedding_ids") no_target_data = tf.placeholder(tf.float32, [ self.batch_size, self.input_width, self.input_width, self.input_filters + self.output_filters ], name='no_target_A_and_B_images') no_target_ids = tf.placeholder(tf.int64, shape=None, name="no_target_embedding_ids") # target images real_B = real_data[:, :, :, :self.input_filters] # source images real_A = real_data[:, :, :, self.input_filters:self.input_filters + self.output_filters] embedding = init_embedding(self.embedding_num, self.embedding_dim) fake_B, encoded_real_A = self.generator(real_A, embedding, embedding_ids, is_training=is_training, inst_norm=inst_norm) real_AB = tf.concat([real_A, real_B], 3) fake_AB = tf.concat([real_A, fake_B], 3) # Note it is not possible to set reuse flag back to False # initialize all variables before setting reuse to True real_D, real_D_logits, real_category_logits = self.discriminator( real_AB, is_training=is_training, reuse=False) fake_D, fake_D_logits, fake_category_logits = self.discriminator( fake_AB, is_training=is_training, reuse=True) # encoding constant loss # this loss assume that generated imaged and real image # should reside in the same space and close to each other encoded_fake_B = self.encoder(fake_B, is_training, reuse=True)[0] const_loss = (tf.reduce_mean( tf.square(encoded_real_A - encoded_fake_B))) * self.Lconst_penalty # category loss true_labels = tf.reshape(tf.one_hot(indices=embedding_ids, depth=self.embedding_num), shape=[self.batch_size, self.embedding_num]) real_category_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=real_category_logits, labels=true_labels)) fake_category_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=fake_category_logits, labels=true_labels)) category_loss = self.Lcategory_penalty * (real_category_loss + fake_category_loss) # binary real/fake loss d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=real_D_logits, labels=tf.ones_like(real_D))) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=fake_D_logits, labels=tf.zeros_like(fake_D))) # L1 loss between real and generated images l1_loss = self.L1_penalty * tf.reduce_mean(tf.abs(fake_B - real_B)) # total variation loss width = self.output_width tv_loss = ( tf.nn.l2_loss(fake_B[:, 1:, :, :] - fake_B[:, :width - 1, :, :]) / width + tf.nn.l2_loss(fake_B[:, :, 1:, :] - fake_B[:, :, :width - 1, :]) / width) * self.Ltv_penalty # maximize the chance generator fool the discriminator cheat_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=fake_D_logits, labels=tf.ones_like(fake_D))) d_loss = d_loss_real + d_loss_fake + category_loss / 2.0 g_loss = cheat_loss + l1_loss + self.Lcategory_penalty * fake_category_loss + const_loss + tv_loss if no_target_source: # no_target source are examples that don't have the corresponding target images # however, except L1 loss, we can compute category loss, binary loss and constant losses with those examples # it is useful when discriminator get saturated and d_loss drops to near zero # those data could be used as additional source of losses to break the saturation no_target_A = no_target_data[:, :, :, self. input_filters:self.input_filters + self.output_filters] no_target_B, encoded_no_target_A = self.generator( no_target_A, embedding, no_target_ids, is_training=is_training, inst_norm=inst_norm, reuse=True) no_target_labels = tf.reshape( tf.one_hot(indices=no_target_ids, depth=self.embedding_num), shape=[self.batch_size, self.embedding_num]) no_target_AB = tf.concat([no_target_A, no_target_B], 3) no_target_D, no_target_D_logits, no_target_category_logits = self.discriminator( no_target_AB, is_training=is_training, reuse=True) encoded_no_target_B = self.encoder(no_target_B, is_training, reuse=True)[0] no_target_const_loss = tf.reduce_mean( tf.square(encoded_no_target_A - encoded_no_target_B)) * self.Lconst_penalty no_target_category_loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=no_target_category_logits, labels=no_target_labels)) * self.Lcategory_penalty d_loss_no_target = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=no_target_D_logits, labels=tf.zeros_like(no_target_D))) cheat_loss += tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=no_target_D_logits, labels=tf.ones_like(no_target_D))) d_loss = d_loss_real + d_loss_fake + d_loss_no_target + ( category_loss + no_target_category_loss) / 3.0 g_loss = cheat_loss / 2.0 + l1_loss + \ (self.Lcategory_penalty * fake_category_loss + no_target_category_loss) / 2.0 + \ (const_loss + no_target_const_loss) / 2.0 + tv_loss d_loss_real_summary = tf.summary.scalar("d_loss_real", d_loss_real) d_loss_fake_summary = tf.summary.scalar("d_loss_fake", d_loss_fake) category_loss_summary = tf.summary.scalar("category_loss", category_loss) cheat_loss_summary = tf.summary.scalar("cheat_loss", cheat_loss) l1_loss_summary = tf.summary.scalar("l1_loss", l1_loss) fake_category_loss_summary = tf.summary.scalar("fake_category_loss", fake_category_loss) const_loss_summary = tf.summary.scalar("const_loss", const_loss) d_loss_summary = tf.summary.scalar("d_loss", d_loss) g_loss_summary = tf.summary.scalar("g_loss", g_loss) tv_loss_summary = tf.summary.scalar("tv_loss", tv_loss) d_merged_summary = tf.summary.merge([ d_loss_real_summary, d_loss_fake_summary, category_loss_summary, d_loss_summary ]) g_merged_summary = tf.summary.merge([ cheat_loss_summary, l1_loss_summary, fake_category_loss_summary, const_loss_summary, g_loss_summary, tv_loss_summary ]) # expose useful nodes in the graph as handles globally input_handle = InputHandle(real_data=real_data, embedding_ids=embedding_ids, no_target_data=no_target_data, no_target_ids=no_target_ids) loss_handle = LossHandle(d_loss=d_loss, g_loss=g_loss, const_loss=const_loss, l1_loss=l1_loss, category_loss=category_loss, cheat_loss=cheat_loss, tv_loss=tv_loss) eval_handle = EvalHandle(encoder=encoded_real_A, generator=fake_B, target=real_B, source=real_A, embedding=embedding) summary_handle = SummaryHandle(d_merged=d_merged_summary, g_merged=g_merged_summary) # those operations will be shared, so we need # to make them visible globally setattr(self, "input_handle", input_handle) setattr(self, "loss_handle", loss_handle) setattr(self, "eval_handle", eval_handle) setattr(self, "summary_handle", summary_handle)
def embedding_to_padding(emb): """Input embeddings -> is_padding.""" emb_sum = tf.reduce_sum(tf.abs(emb), axis=-1, keep_dims=True) return tf.to_float(tf.equal(emb_sum, 0.0))
def _build_single_q_network(self, observations, head, state_t, state_tp1, done_mask, reward_t, error_weight): """Builds the computational graph for a single Q network. Briefly, this part is calculating the following two quantities: 1. q_value = q_fn(observations) 2. td_error = q_fn(state_t) - reward_t - gamma * q_fn(state_tp1) The optimization target is to minimize the td_error. Args: observations: shape = [batch_size, hparams.fingerprint_length]. The input of the Q function. head: shape = [1]. The index of the head chosen for decision in bootstrap DQN. state_t: shape = [batch_size, hparams.fingerprint_length]. The state at time step t. state_tp1: a list of tensors, with total number of batch_size, each has shape = [num_actions, hparams.fingerprint_length]. Note that the num_actions can be different for each tensor. The state at time step t+1, tp1 is short for t plus 1. done_mask: shape = [batch_size, 1] Whether state_tp1 is the terminal state. reward_t: shape = [batch_size, 1] the reward at time step t. error_weight: shape = [batch_size, 1] weight for the loss. Returns: q_values: Tensor of [batch_size, 1]. The q values for the observations. td_error: Tensor of [batch_size, 1]. The TD error. weighted_error: Tensor of [batch_size, 1]. The TD error weighted by error_weight. q_fn_vars: List of tf.Variables. The variables of q_fn when computing the q_values of state_t q_fn_vars: List of tf.Variables. The variables of q_fn when computing the q_values of state_tp1 """ with tf.variable_scope('q_fn'): # q_value have shape [batch_size, 1]. q_values = tf.gather(self.q_fn(observations), head, axis=-1) # calculating q_fn(state_t) # The Q network shares parameters with the action graph. with tf.variable_scope('q_fn', reuse=True): q_t = self.q_fn(state_t, reuse=True) q_fn_vars = tf.trainable_variables(scope=tf.get_variable_scope().name + '/q_fn') # calculating q_fn(state_tp1) with tf.variable_scope('q_tp1', reuse=tf.AUTO_REUSE): q_tp1 = [self.q_fn(s_tp1, reuse=tf.AUTO_REUSE) for s_tp1 in state_tp1] q_tp1_vars = tf.trainable_variables(scope=tf.get_variable_scope().name + '/q_tp1') if self.double_q: with tf.variable_scope('q_fn', reuse=True): q_tp1_online = [self.q_fn(s_tp1, reuse=True) for s_tp1 in state_tp1] if self.num_bootstrap_heads: num_heads = self.num_bootstrap_heads else: num_heads = 1 # determine the action to choose based on online Q estimator. q_tp1_online_idx = [ tf.stack( [tf.argmax(q, axis=0), tf.range(num_heads, dtype=tf.int64)], axis=1) for q in q_tp1_online ] # use the index from max online q_values to compute the value # function v_tp1 = tf.stack( [tf.gather_nd(q, idx) for q, idx in zip(q_tp1, q_tp1_online_idx)], axis=0) else: v_tp1 = tf.stack([tf.reduce_max(q) for q in q_tp1], axis=0) # if s_{t+1} is the terminal state, we do not evaluate the Q value of # the state. q_tp1_masked = (1.0 - done_mask) * v_tp1 q_t_target = reward_t + self.gamma * q_tp1_masked # stop gradient from flowing to the computating graph which computes # the Q value of s_{t+1}. # td_error has shape [batch_size, 1] td_error = q_t - tf.stop_gradient(q_t_target) # If use bootstrap, each head is trained with a different subset of the # training sample. Like the idea of dropout. if self.num_bootstrap_heads: head_mask = tf.keras.backend.random_binomial( shape=(1, self.num_bootstrap_heads), p=0.6) td_error = tf.reduce_mean(td_error * head_mask, axis=1) # The loss comes from a traditional trick in convex optimization: # http://web.stanford.edu/~boyd/cvxbook/. # See Chapter 6 pp. 298 # It will makes the optimization robust. # Specifically, the loss will use l1 instead of l2 loss when the td error # gets larger than 1.0. The l2 loss has the disadvantage that it has # the tendency to be dominated by outliers. In terms of estimation theory, # the asymptotic relative efficiency of the l1 loss estimator is better # for heavy-tailed distributions. errors = tf.where( tf.abs(td_error) < 1.0, tf.square(td_error) * 0.5, 1.0 * (tf.abs(td_error) - 0.5)) weighted_error = tf.reduce_mean(error_weight * errors) return q_values, td_error, weighted_error, q_fn_vars, q_tp1_vars
def __init__( self, *, scope, ob_space, ac_space, stochpol_fn, nsteps, nepochs=4, nminibatches=1, gamma=0.99, gamma_ext=0.99, lam=0.95, ent_coef=0, cliprange=0.2, max_grad_norm=1.0, vf_coef=1.0, lr=30e-5, adam_hps=None, testing=False, comm=None, comm_train=None, use_news=False, update_ob_stats_every_step=True, int_coeff=None, ext_coeff=None, obs_save_flag=False, ): self.lr = lr self.ext_coeff = ext_coeff self.int_coeff = int_coeff self.use_news = use_news self.update_ob_stats_every_step = update_ob_stats_every_step self.abs_scope = (tf.get_variable_scope().name + '/' + scope).lstrip('/') self.testing = testing self.comm_log = MPI.COMM_SELF if comm is not None and comm.Get_size() > 1: self.comm_log = comm assert not testing or comm.Get_rank( ) != 0, "Worker number zero can't be testing" if comm_train is not None: self.comm_train, self.comm_train_size = comm_train, comm_train.Get_size( ) else: self.comm_train, self.comm_train_size = self.comm_log, self.comm_log.Get_size( ) self.is_log_leader = self.comm_log.Get_rank() == 0 self.is_train_leader = self.comm_train.Get_rank() == 0 self.obs_save_flag = obs_save_flag if self.is_log_leader: self.obs_rec = [{'acs': [], 'obs': []} for i in range(100)] with tf.variable_scope(scope): self.best_ret = -np.inf self.local_best_ret = -np.inf self.rooms = [] self.local_rooms = [] self.scores = [] self.ob_space = ob_space self.ac_space = ac_space self.stochpol = stochpol_fn() self.nepochs = nepochs self.cliprange = cliprange self.nsteps = nsteps self.nminibatches = nminibatches self.gamma = gamma self.gamma_ext = gamma_ext self.lam = lam self.adam_hps = adam_hps or dict() self.ph_adv = tf.placeholder(tf.float32, [None, None]) self.ph_ret_int = tf.placeholder(tf.float32, [None, None]) self.ph_ret_ext = tf.placeholder(tf.float32, [None, None]) self.ph_oldnlp = tf.placeholder(tf.float32, [None, None]) self.ph_oldvpred = tf.placeholder(tf.float32, [None, None]) self.ph_lr = tf.placeholder(tf.float32, []) self.ph_lr_pred = tf.placeholder(tf.float32, []) self.ph_cliprange = tf.placeholder(tf.float32, []) #Define loss. neglogpac = self.stochpol.pd_opt.neglogp(self.stochpol.ph_ac) entropy = tf.reduce_mean(self.stochpol.pd_opt.entropy()) vf_loss_int = (0.5 * vf_coef) * tf.reduce_mean( tf.square(self.stochpol.vpred_int_opt - self.ph_ret_int)) vf_loss_ext = (0.5 * vf_coef) * tf.reduce_mean( tf.square(self.stochpol.vpred_ext_opt - self.ph_ret_ext)) vf_loss = vf_loss_int + vf_loss_ext ratio = tf.exp(self.ph_oldnlp - neglogpac) # p_new / p_old negadv = -self.ph_adv pg_losses1 = negadv * ratio pg_losses2 = negadv * tf.clip_by_value( ratio, 1.0 - self.ph_cliprange, 1.0 + self.ph_cliprange) pg_loss = tf.reduce_mean(tf.maximum(pg_losses1, pg_losses2)) ent_loss = (-ent_coef) * entropy approxkl = .5 * tf.reduce_mean( tf.square(neglogpac - self.ph_oldnlp)) maxkl = .5 * tf.reduce_max(tf.square(neglogpac - self.ph_oldnlp)) clipfrac = tf.reduce_mean( tf.to_float(tf.greater(tf.abs(ratio - 1.0), self.ph_cliprange))) loss = pg_loss + ent_loss + vf_loss + self.stochpol.aux_loss #Create optimizer. params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.abs_scope) logger.info("PPO: using MpiAdamOptimizer connected to %i peers" % self.comm_train_size) trainer = MpiAdamOptimizer(self.comm_train, learning_rate=self.ph_lr, **self.adam_hps) grads_and_vars = trainer.compute_gradients(loss, params) grads, vars = zip(*grads_and_vars) if max_grad_norm: _, _grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) global_grad_norm = tf.global_norm(grads) grads_and_vars = list(zip(grads, vars)) self._train = trainer.apply_gradients(grads_and_vars) #Quantities for reporting. self._losses = [ loss, pg_loss, vf_loss, entropy, clipfrac, approxkl, maxkl, self.stochpol.aux_loss, self.stochpol.feat_var, self.stochpol.max_feat, global_grad_norm ] self.loss_names = [ 'tot', 'pg', 'vf', 'ent', 'clipfrac', 'approxkl', 'maxkl', "auxloss", "featvar", "maxfeat", "gradnorm" ] self.I = None self.disable_policy_update = None allvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.abs_scope) if self.is_log_leader: tf_util.display_var_info(allvars) tf.get_default_session().run(tf.variables_initializer(allvars)) sync_from_root(tf.get_default_session(), allvars) #Syncs initialization across mpi workers. self.t0 = time.time() self.global_tcount = 0
def decode(self, x): x = tf.to_float(x) # we can't use tf.pow(..., 0.125) because of a high-error approximation # on TPU. Instead we sqrt three times. return tf.sign(x) * (tf.sqrt(tf.sqrt(tf.sqrt(tf.abs(x)))) / 128.0)
def __call__(self, vocabs, moving_params=None): """ """ top_recur = super(GamaParser, self).__call__(vocabs, moving_params=moving_params) int_tokens_to_keep = tf.to_int32(self.tokens_to_keep) with tf.variable_scope('MLP'): dep_mlp, head_mlp = self.MLP( top_recur, self.arc_mlp_size + self.rel_mlp_size + 2 * self.p_mlp_size, n_splits=2) arc_dep_mlp, rel_dep_mlp, mu_dep_mlp, sigma_dep_mlp = tf.split( dep_mlp, [ self.arc_mlp_size, self.rel_mlp_size, self.p_mlp_size, self.p_mlp_size ], axis=2) arc_head_mlp, rel_head_mlp, mu_head_mlp, sigma_head_mlp = tf.split( head_mlp, [ self.arc_mlp_size, self.rel_mlp_size, self.p_mlp_size, self.p_mlp_size ], axis=2) with tf.variable_scope('dist'): with tf.variable_scope('mu'): # (n x b x d) o (d x 1 x d) o (n x b x d).T -> (n x b x b) arc_mus = self.bilinear(mu_dep_mlp, mu_head_mlp, 1)**2 with tf.variable_scope('sigma'): # (n x b x d) o (d x 1 x d) o (n x b x d).T -> (n x b x b) arc_sigmas = self.bilinear( sigma_dep_mlp, sigma_head_mlp, 1, initializer=None)**2 + .1 # (b x 1) i_mat = tf.expand_dims(tf.range(self.bucket_size), 1) # (1 x b) j_mat = tf.expand_dims(tf.range(self.bucket_size), 0) # (b x 1) - (1 x b) -> (b x b) k_mat = tf.to_float(tf.abs(i_mat - j_mat)) arc_logits = -.5 * tf.log(2 * np.pi * arc_sigmas) - .5 * ( k_mat - arc_mus)**2 / arc_sigmas #arc_rs += tf.to_float(k_mat)#tf.to_float(tf.expand_dims(tf.expand_dims(self.sequence_lengths, 1), 1)) # (b x 1) #n_mat = tf.expand_dims(self.sequence_lengths, 1) - 1 - i_mat # (b x b) * (n x b x b) - (n x b x b) - (b x b) -> (n x b x b) #arc_logits = (tf.lgamma(arc_rs+1) - tf.lgamma(k_mat) - tf.lgamma(arc_rs-k_mat+2) + # k_mat * tf.log(arc_ps) + (arc_rs-k_mat+1)*tf.log(1-arc_ps) ) with tf.variable_scope('Arc'): # (n x b x d) o (d x 1 x d) o (n x b x d).T -> (n x b x b) arc_logits += self.bilinear(arc_dep_mlp, arc_head_mlp, 1, add_bias2=False) # (n x b x b) arc_probs = tf.nn.softmax(arc_logits) # (n x b) arc_preds = tf.to_int32(tf.argmax(arc_logits, axis=-1)) # (n x b) arc_targets = self.vocabs['heads'].placeholder # (n x b) arc_correct = tf.to_int32(tf.equal( arc_preds, arc_targets)) * int_tokens_to_keep # () arc_loss = tf.losses.sparse_softmax_cross_entropy( arc_targets, arc_logits, self.tokens_to_keep) with tf.variable_scope('Rel'): # (n x b x d) o (d x r x d) o (n x b x d).T -> (n x b x r x b) rel_logits = self.bilinear(rel_dep_mlp, rel_head_mlp, len(self.vocabs['rels'])) # (n x b x r x b) rel_probs = tf.nn.softmax(rel_logits, dim=2) # (n x b x b) one_hot = tf.one_hot( arc_preds if moving_params is not None else arc_targets, self.bucket_size) # (n x b x b) -> (n x b x b x 1) one_hot = tf.expand_dims(one_hot, axis=3) # (n x b x r x b) o (n x b x b x 1) -> (n x b x r x 1) select_rel_logits = tf.matmul(rel_logits, one_hot) # (n x b x r x 1) -> (n x b x r) select_rel_logits = tf.squeeze(select_rel_logits, axis=3) # (n x b) rel_preds = tf.to_int32(tf.argmax(select_rel_logits, axis=-1)) # (n x b) rel_targets = self.vocabs['rels'].placeholder # (n x b) rel_correct = tf.to_int32(tf.equal( rel_preds, rel_targets)) * int_tokens_to_keep # () rel_loss = tf.losses.sparse_softmax_cross_entropy( rel_targets, select_rel_logits, self.tokens_to_keep) n_arc_correct = tf.reduce_sum(arc_correct) n_rel_correct = tf.reduce_sum(rel_correct) correct = arc_correct * rel_correct n_correct = tf.reduce_sum(correct) n_seqs_correct = tf.reduce_sum( tf.to_int32( tf.equal(tf.reduce_sum(correct, axis=1), self.sequence_lengths - 1))) loss = arc_loss + rel_loss outputs = { 'arc_logits': arc_logits, 'arc_mus': arc_mus, 'arc_sigmas': arc_sigmas, 'arc_probs': arc_probs, 'arc_preds': arc_preds, 'arc_targets': arc_targets, 'arc_correct': arc_correct, 'arc_loss': arc_loss, 'n_arc_correct': n_arc_correct, 'rel_logits': rel_logits, 'rel_probs': rel_probs, 'rel_preds': rel_preds, 'rel_targets': rel_targets, 'rel_correct': rel_correct, 'rel_loss': rel_loss, 'n_rel_correct': n_rel_correct, 'n_tokens': self.n_tokens, 'n_seqs': self.batch_size, 'tokens_to_keep': self.tokens_to_keep, 'n_correct': n_correct, 'n_seqs_correct': n_seqs_correct, 'loss': loss } return outputs
def apply_gradient_clipping(gradient): if gradient is not None: return tf.mul(tf.clip_by_value(tf.abs(grad), 0.1, 1.), tf.sign(grad)) else: return None
def start_interaction(self, env_fns, dynamics, nlump=2): self.loss_names, self._losses = zip(*list(self.to_report.items())) self.global_step = tf.Variable(0, trainable=False) params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if MPI.COMM_WORLD.Get_size() > 1: if self.agent_num is None: trainer = MpiAdamOptimizer(learning_rate=self.ph_lr, comm=MPI.COMM_WORLD) else: if self.agent_num is None: if self.optim == 'adam': trainer = tf.train.AdamOptimizer(learning_rate=self.ph_lr) elif self.optim == 'sgd': print("using sgd") print("________________________") if self.decay: self.decay_lr = tf.train.exponential_decay( self.ph_lr, self.global_step, 2500, .96, staircase=True) trainer = tf.train.GradientDescentOptimizer( learning_rate=self.decay_lr) else: trainer = tf.train.GradientDescentOptimizer( learning_rate=self.ph_lr) elif self.optim == 'momentum': print('using momentum') print('________________________') trainer = tf.train.MomentumOptimizer( learning_rate=self.ph_lr, momentum=0.9) if self.agent_num is None: gradsandvars = trainer.compute_gradients(self.total_loss, params) l2_norm = lambda t: tf.sqrt(tf.reduce_sum(tf.pow(t, 2))) if self.log_grads: for grad, var in gradsandvars: tf.summary.histogram(var.name + '/gradient', l2_norm(grad)) tf.summary.histogram(var.name + '/value', l2_norm(var)) grad_mean = tf.reduce_mean(tf.abs(grad)) tf.summary.scalar(var.name + '/grad_mean', grad_mean) if self.decay: tf.summary.scalar('decay_lr', self.decay_lr) self._summary = tf.summary.merge_all() tf.add_to_collection("summary_op", self._summary) if self.grad_clip > 0: grads, gradvars = zip(*gradsandvars) grads, _ = tf.clip_by_global_norm(grads, self.grad_clip) gradsandvars = list(zip(grads, gradvars)) self._train = trainer.apply_gradients(gradsandvars, global_step=self.global_step) self._updates = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self._train = tf.group(self._train, self._updates) tf.add_to_collection("train_op", self._train) else: self._train = tf.get_collection("train_op")[0] if self.log_grads: self._summary = tf.get_collection("summary_op")[0] if MPI.COMM_WORLD.Get_rank() == 0: getsess().run( tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES))) bcast_tf_vars_from_root( getsess(), tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)) self.all_visited_rooms = [] self.all_scores = [] self.nenvs = nenvs = len(env_fns) self.nlump = nlump self.lump_stride = nenvs // self.nlump self.envs = [ VecEnv(env_fns[l * self.lump_stride:(l + 1) * self.lump_stride], spaces=[self.env_ob_space, self.ac_space]) for l in range(self.nlump) ] self.rollout = Rollout(ob_space=self.ob_space, ac_space=self.ac_space, nenvs=nenvs, nsteps_per_seg=self.nsteps_per_seg, nsegs_per_env=self.nsegs_per_env, nlumps=self.nlump, envs=self.envs, policy=self.stochpol, int_rew_coeff=self.int_coeff, ext_rew_coeff=self.ext_coeff, record_rollouts=self.use_recorder, dynamics=dynamics, exp_name=self.exp_name, env_name=self.env_name, video_log_freq=self.video_log_freq, model_save_freq=self.model_save_freq, use_apples=self.use_apples, multi_envs=self.multi_envs, lstm=self.lstm, lstm1_size=self.lstm1_size, lstm2_size=self.lstm2_size, depth_pred=self.depth_pred, early_stop=self.early_stop, aux_input=self.aux_input) self.buf_advs = np.zeros((nenvs, self.rollout.nsteps), np.float32) self.buf_rets = np.zeros((nenvs, self.rollout.nsteps), np.float32) if self.normrew: self.rff = RewardForwardFilter(self.gamma) self.rff_rms = RunningMeanStd() self.step_count = 0 self.t_last_update = time.time() self.t_start = time.time()
def diff(a, b): return tf.reduce_mean(tf.abs(a - b))
def __init__(self, *, hps, scope, ob_space, env_ob_space, ac_space, stochpol, ent_coef, gamma, lam, nepochs, lr, cliprange, nminibatches, normrew, normadv, use_news, ext_coeff, int_coeff, nsteps_per_seg, nsegs_per_env, dynamics, exp_name, env_name, video_log_freq, model_save_freq, use_apples, agent_num=None, restore_name=None, multi_envs=None, lstm=False, lstm1_size=512, lstm2_size=0, depth_pred=0, beta_d=.1, early_stop=0, aux_input=0, optim='adam', decay=0, grad_clip=0.0, log_grads=0, logdir='logs'): self.dynamics = dynamics self.exp_name = exp_name self.env_name = env_name self.video_log_freq = video_log_freq self.model_save_freq = model_save_freq self.use_apples = use_apples self.agent_num = agent_num self.multi_envs = multi_envs self.lstm = lstm self.lstm1_size = lstm1_size self.lstm2_size = lstm2_size self.depth_pred = depth_pred self.aux_input = aux_input self.early_stop = early_stop self.optim = optim self.decay = decay self.log_grads = log_grads self.grad_clip = grad_clip if log_grads: self.grad_writer = tf.summary.FileWriter(logdir + '/grads/' + exp_name) with tf.variable_scope(scope): self.use_recorder = True self.n_updates = 0 self.scope = scope self.ob_space = ob_space self.env_ob_space = env_ob_space self.ac_space = ac_space self.stochpol = stochpol self.nepochs = nepochs self.lr = lr self.cliprange = cliprange self.nsteps_per_seg = nsteps_per_seg self.nsegs_per_env = nsegs_per_env self.nminibatches = nminibatches self.gamma = gamma self.lam = lam self.normrew = normrew self.normadv = normadv self.use_news = use_news self.ext_coeff = ext_coeff self.int_coeff = int_coeff self.ent_coeff = ent_coef self.beta_d = beta_d def mask(target, mask): mask_h = tf.abs(mask - 1) return tf.stop_gradient(mask_h * target) + mask * target if self.agent_num is None: self.ph_adv = tf.placeholder(tf.float32, [None, None], name='adv') self.ph_ret = tf.placeholder(tf.float32, [None, None], name='ret') self.ph_rews = tf.placeholder(tf.float32, [None, None], name='rews') self.ph_oldnlp = tf.placeholder(tf.float32, [None, None], name='oldnlp') self.ph_oldvpred = tf.placeholder(tf.float32, [None, None], name='oldvpred') self.ph_lr = tf.placeholder(tf.float32, [], name='lr') self.ph_cliprange = tf.placeholder(tf.float32, [], name='cliprange') self.ph_gradmask = tf.placeholder(tf.float32, [None, None], name='gradmask') neglogpac = mask(self.stochpol.pd.neglogp(self.stochpol.ph_ac), self.ph_gradmask) entropy = tf.reduce_mean(self.stochpol.pd.entropy(), name='agent_entropy') vpred = mask(self.stochpol.vpred, self.ph_gradmask) vf_loss = 0.5 * tf.reduce_mean( (vpred - mask(self.ph_ret, self.ph_gradmask))**2, name='vf_loss') ratio = tf.exp(self.ph_oldnlp - neglogpac, name='ratio') # p_new / p_old negadv = -mask(self.ph_adv, self.ph_gradmask) pg_losses1 = negadv * ratio pg_losses2 = negadv * tf.clip_by_value(ratio, 1.0 - self.ph_cliprange, 1.0 + self.ph_cliprange, name='pglosses2') pg_loss_surr = tf.maximum(pg_losses1, pg_losses2, name='loss_surr') pg_loss = tf.reduce_mean(pg_loss_surr, name='pg_loss') ent_loss = (-ent_coef) * entropy if self.depth_pred: depth_loss = self.stochpol.depth_loss * beta_d approxkl = .5 * tf.reduce_mean( tf.square(neglogpac - self.ph_oldnlp), name='approxkl') clipfrac = tf.reduce_mean( tf.to_float(tf.abs(pg_losses2 - pg_loss_surr) > 1e-6), name='clipfrac') self.total_loss = pg_loss + ent_loss + vf_loss if self.depth_pred: self.total_loss = self.total_loss + depth_loss #self.total_loss = depth_loss #print("adding depth loss to total loss for optimization") #self.total_loss = depth_loss self.to_report = { 'tot': self.total_loss, 'pg': pg_loss, 'vf': vf_loss, 'ent': entropy, 'approxkl': approxkl, 'clipfrac': clipfrac } if self.depth_pred: self.to_report.update({'depth_loss': depth_loss}) tf.add_to_collection('adv', self.ph_adv) tf.add_to_collection('ret', self.ph_ret) tf.add_to_collection('rews', self.ph_rews) tf.add_to_collection('oldnlp', self.ph_oldnlp) tf.add_to_collection('oldvpred', self.ph_oldvpred) tf.add_to_collection('lr', self.ph_lr) tf.add_to_collection('cliprange', self.ph_cliprange) tf.add_to_collection('agent_entropy', entropy) tf.add_to_collection('vf_loss', vf_loss) tf.add_to_collection('ratio', ratio) tf.add_to_collection('pg_losses2', pg_losses2) tf.add_to_collection('loss_surr', pg_loss_surr) tf.add_to_collection('pg_loss', pg_loss) if self.depth_pred: tf.add_to_collection('depth_loss', depth_loss) tf.add_to_collection('approxkl', approxkl) tf.add_to_collection('clipfrac', clipfrac) else: self.restore()
def model_creation(neurons, nb_features, nb_targets): # Session sess = tf.InteractiveSession() # Placeholders X = tf.placeholder(tf.float32, shape=[None, nb_features]) Y = tf.placeholder(tf.float32, shape=[None, nb_targets]) # Definition on number of neurons and layers if len(neurons) < 1: raise Exception("You must have at least one hidden layer") weight_initializer = tf.variance_scaling_initializer( mode="fan_avg", distribution="uniform", scale=1) bias_initializer = tf.zeros_initializer() layers_dict = {} # # Hidden weight and bias for id in range(len(neurons)): if id == 0: layers_dict["weight_hidden_" + str(id)] = tf.Variable( weight_initializer([nb_features, neurons[id]])) layers_dict["bias_hidden_" + str(id)] = tf.Variable( bias_initializer([neurons[id]])) else: layers_dict["weight_hidden_" + str(id)] = tf.Variable( weight_initializer([neurons[id - 1], neurons[id]])) layers_dict["bias_hidden_" + str(id)] = tf.Variable( bias_initializer([neurons[id]])) # Out layers and bias layers_dict["weight_out"] = tf.Variable( weight_initializer([neurons[-1], nb_targets])) layers_dict["bias_out"] = tf.Variable(bias_initializer([nb_targets])) # Hidden layers for id in range(len(neurons)): if id == 0: layers_dict["hidden_layer_" + str(id)] = tf.sigmoid( tf.add(tf.matmul(X, layers_dict["weight_hidden_" + str(id)]), layers_dict["bias_hidden_" + str(id)])) else: layers_dict["hidden_layer_" + str(id)] = tf.sigmoid( tf.add( tf.matmul(layers_dict["hidden_layer_" + str(id - 1)], layers_dict["weight_hidden_" + str(id)]), layers_dict["bias_hidden_" + str(id)])) # Output layer layers_dict["output_layer"] = tf.abs(tf.transpose( tf.add( tf.matmul(layers_dict["hidden_layer_" + str(len(neurons) - 1)], layers_dict["weight_out"]), layers_dict["bias_out"])), name="output_layer") #Cost_function mse = tf.sqrt( tf.reduce_mean(tf.squared_difference(layers_dict["output_layer"], Y))) # Optimizer opt = tf.train.AdamOptimizer(0.001).minimize(mse) # Init sess.run(tf.global_variables_initializer()) return ((X, Y, sess, opt, mse, layers_dict))
def sym_exp_sigmoid(x, width=8.0): """Symmetrical version of exp_sigmoid centered at (0, 1e-7).""" x = tf_float32(x) return exp_sigmoid(width * (tf.abs(x) / 2.0 - 1.0))
def rgbd_consistency_loss(frame1transformed_depth, frame1rgb, frame2depth, frame2rgb, validity_mask=None): """Computes a loss that penalizes RGBD inconsistencies between frames. This function computes 3 losses that penalize inconsistencies between two frames: depth, RGB, and structural similarity. It IS NOT SYMMETRIC with respect to both frames. In particular, to address occlusions, it only penalizes depth and RGB inconsistencies at pixels where frame1 is closer to the camera than frame2 (Why? see https://arxiv.org/abs/1904.04998). Therefore the intended usage pattern is running it twice - second time with the two frames swapped. Args: frame1transformed_depth: A transform_depth_map.TransformedDepthMap object representing the depth map of frame 1 after it was motion-transformed to frame 2, a motion transform that accounts for all camera and object motion that occurred between frame1 and frame2. The tensors inside frame1transformed_depth are of shape [B, H, W]. frame1rgb: A tf.Tensor of shape [B, H, W, C] containing the RGB image at frame1. frame2depth: A tf.Tensor of shape [B, H, W] containing the depth map at frame2. frame2rgb: A tf.Tensor of shape [B, H, W, C] containing the RGB image at frame2. validity_mask: a tf.Tensor of a floating point type and a shape of [B, H, W, 1] containing a validity mask. Returns: A dicionary from string to tf.Tensor, with the following entries: depth_error: A tf scalar, the depth mismatch error between the two frames. rgb_error: A tf scalar, the rgb mismatch error between the two frames. ssim_error: A tf scalar, the strictural similarity mismatch error between the two frames. depth_proximity_weight: A tf.Tensor of shape [B, H, W], representing a function that peaks (at 1.0) for pixels where there is depth consistency between the two frames, and is small otherwise. frame1_closer_to_camera: A tf.Tensor of shape [B, H, W, 1], a mask that is 1.0 when the depth map of frame 1 has smaller depth than frame 2. """ frame2rgbd = tf.concat( [frame2rgb, tf.expand_dims((frame2depth), -1)], axis=-1) frame2rgbd_resampled = resampler.resampler_with_unstacked_warp( frame2rgbd, frame1transformed_depth.pixel_x, frame1transformed_depth.pixel_y, safe=False) frame2rgb_resampled, frame2depth_resampled = tf.split( frame2rgbd_resampled, [3, 1], axis=-1) frame2depth_resampled = tf.squeeze(frame2depth_resampled, axis=-1) # f1td.depth is the predicted depth at [pixel_y, pixel_x] for frame2. Now we # generate (by interpolation) the actual depth values for frame2's depth, at # the same locations, so that we can compare the two depths. # We penalize inconsistencies between the two frames' depth maps only if the # transformed depth map (of frame 1) falls closer to the camera than the # actual depth map (of frame 2). This is intended for avoiding penalizing # points that become occluded because of the transform. # So what about depth inconsistencies where frame1's depth map is FARTHER from # the camera than frame2's? These will be handled when we swap the roles of # frame 1 and 2 (more in https://arxiv.org/abs/1904.04998). frame1_closer_to_camera = tf.to_float( tf.logical_and( frame1transformed_depth.mask, tf.less(frame1transformed_depth.depth, frame2depth_resampled))) frames_l1_diff = tf.abs(frame2depth_resampled - frame1transformed_depth.depth) if validity_mask is not None: frames_l1_diff = frames_l1_diff * tf.squeeze(validity_mask, axis=[3]) depth_error = tf.reduce_mean( tf.math.multiply_no_nan(frames_l1_diff, frame1_closer_to_camera)) frames_rgb_l1_diff = tf.abs(frame2rgb_resampled - frame1rgb) if validity_mask is not None: frames_rgb_l1_diff = frames_rgb_l1_diff * validity_mask rgb_error = tf.math.multiply_no_nan( frames_rgb_l1_diff, tf.expand_dims(frame1_closer_to_camera, -1)) rgb_error = tf.reduce_mean(rgb_error) # We generate a weight function that peaks (at 1.0) for pixels where when the # depth difference is less than its standard deviation across the frame, and # fall off to zero otherwise. This function is used later for weighing the # structural similarity loss term. We only want to demand structural # similarity for surfaces that are close to one another in the two frames. depth_error_second_moment = _weighted_average( tf.square(frame2depth_resampled - frame1transformed_depth.depth), frame1_closer_to_camera) + 1e-4 depth_proximity_weight = tf.math.multiply_no_nan( depth_error_second_moment / (tf.square(frame2depth_resampled - frame1transformed_depth.depth) + depth_error_second_moment), tf.to_float(frame1transformed_depth.mask)) if validity_mask is not None: depth_proximity_weight = depth_proximity_weight * tf.squeeze( validity_mask, axis=[3]) # If we don't stop the gradient training won't start. The reason is presumably # that then the network can push the depths apart instead of seeking RGB # consistency. depth_proximity_weight = tf.stop_gradient(depth_proximity_weight) ssim_error, avg_weight = weighted_ssim( frame2rgb_resampled, frame1rgb, depth_proximity_weight, c1=float('inf'), # These values of c1 and c2 seemed to work better than c2=9e-6) # defaults. TODO(gariel): Make them parameters rather # than hard coded. ssim_error_mean = tf.reduce_mean( tf.math.multiply_no_nan(ssim_error, avg_weight)) endpoints = { 'depth_error': depth_error, 'rgb_error': rgb_error, 'ssim_error': ssim_error_mean, 'depth_proximity_weight': depth_proximity_weight, 'frame1_closer_to_camera': frame1_closer_to_camera } return endpoints
def soft_relu(x): """Compute log(1 + exp(x)).""" # Note: log(sigmoid(x)) = x - soft_relu(x) = - soft_relu(-x). # log(1 - sigmoid(x)) = - soft_relu(x) return tf.log(1.0 + tf.exp(-tf.abs(x))) + tf.maximum(x, 0.0)
def _finish(self, state): update_ops = [] grads_at_prev_iterate = self._recompute_gradients(state) for var, grad, grad_at_prev_iterate in zip(self.vars, self.grads, grads_at_prev_iterate): sum_grad_squared = state.get_slot(var, SUM_GRAD_SQUARED) previous_iterate = state.get_slot(var, PREVIOUS_ITERATE) maximum_gradient = state.get_slot(var, MAXIMUM_GRADIENT) sum_estimates_squared = state.get_slot(var, SUM_ESTIMATES_SQUARED) maximum_gradient_updated = tf.assign( maximum_gradient, tf.maximum(maximum_gradient, tf.norm(grad))) update_ops.append(maximum_gradient_updated) sum_grad_squared_updated = tf.assign_add(sum_grad_squared, tf.pow(tf.abs(grad), 2.0)) update_ops.append(sum_grad_squared_updated) smoothness = tf.norm(grad - grad_at_prev_iterate) / ( 0.0001 + tf.norm(var - previous_iterate)) eta = self.lr * tf.pow(self.eta + sum_grad_squared_updated, -1.0 / 3.0) beta = tf.minimum(1.0, self.momentum * tf.square(eta)) grad_estimate = state.get_slot(var, GRAD_ESTIMATE) new_grad_estimate = grad + (1.0 - beta) * ( grad_estimate - grad_at_prev_iterate) new_grad_estimate = tf.clip_by_value(new_grad_estimate, -maximum_gradient_updated, maximum_gradient_updated) if self.output_summaries: tf.summary.scalar(self._name + "/smoothness/" + var.name, smoothness) tf.summary.scalar(self._name + "/max_grad/" + var.name, maximum_gradient_updated) tf.summary.scalar(self._name + "/average_beta/" + var.name, tf.reduce_mean(beta)) tf.summary.scalar(self._name + "/iterate_diff/" + var.name, tf.norm(var - previous_iterate)) tf.summary.scalar(self._name + "/grad_diff/" + var.name, tf.norm(grad - grad_at_prev_iterate)) tf.summary.scalar(self._name + "/vr_grad_estimate_norm/" + var.name, tf.norm(new_grad_estimate)) tf.summary.scalar(self._name + "/grad_norm/" + var.name, tf.norm(grad)) grad_estimate_updated = tf.assign(grad_estimate, new_grad_estimate) update_ops.append(grad_estimate_updated) sum_estimates_squared_updated = tf.assign_add( sum_estimates_squared, tf.square(new_grad_estimate)) update_ops.append(sum_estimates_squared_updated) with tf.control_dependencies([grad_at_prev_iterate]): previous_iterate_updated = tf.assign(previous_iterate, var) update_ops.append(previous_iterate_updated) step = -eta * grad_estimate_updated with tf.control_dependencies([previous_iterate_updated]): var_updated = tf.assign_add(var, step) update_ops.append(var_updated) return tf.group(*update_ops)
def advantage_activation_sqrt(x): alpha = 0.01 ret = tf.sign(x) * (tf.sqrt(tf.abs(x) + alpha**2) - alpha) return ret
def next_timestep(self, state, action): '''Calculate the next state of the quadcopter after one timestep Size of tensors' first dimension is the batch size for parallel computation Params ====== state: rank-2 tensor, state in the form [position,orientation,vel,ang_vel] action: rank-2 tensor, action commands in the form [climb,roll,pitch,yaw] Returns ====== rank-2 tensor, next state in the form [position,orientation,vel,ang_vel] ''' eta = state[:, 0:3] upsilon = state[:, 3:6] Tport = action[:, 0] Tstbd = action[:, 1] zeros = tf.zeros([self.batch_size], dtype=tf.float32) ones = tf.ones([self.batch_size], dtype=tf.float32) Xu = tf.where( tf.less(tf.abs(upsilon[:, 0]), 1.2), tf.constant(-0.25, dtype=tf.float32, shape=[self.batch_size]), tf.constant(64.55, dtype=tf.float32, shape=[self.batch_size])) Xuu = tf.where( tf.less(tf.abs(upsilon[:, 0]), 1.2), tf.constant(0.0, dtype=tf.float32, shape=[self.batch_size]), tf.constant(-70.92, dtype=tf.float32, shape=[self.batch_size])) Yv = 0.5*(-40*1000*tf.abs(upsilon[:, 1])) * \ (1.1+0.0045*(1.01/0.09) - 0.1*(0.27/0.09)+0.016*(tf.pow((0.27/0.09), 2))) Yr = 6*(-3.141592*1000) * \ tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01 Nv = 0.06*(-3.141592*1000) * \ tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01 Nr = 0.02*(-3.141592*1000) * \ tf.sqrt(tf.pow(upsilon[:, 0], 2)+tf.pow(upsilon[:, 1], 2))*0.09*0.09*1.01*1.01 M = tf.constant( [[self.boat.physics.m - self.boat.physics.X_u_dot, 0, 0], [ 0, self.boat.physics.m - self.boat.physics.Y_v_dot, 0 - self.boat.physics.Y_r_dot ], [ 0, 0 - self.boat.physics.N_v_dot, self.boat.physics.Iz - self.boat.physics.N_r_dot ]]) T = tf.stack([ Tport + self.boat.physics.c * Tstbd, zeros, 0.5 * self.boat.physics.B * (Tport - self.boat.physics.c * Tstbd) ], axis=1) T = tf.reshape(T, [self.batch_size, 3, 1]) CRB = tf.stack([[zeros, zeros, -self.boat.physics.m * upsilon[:, 1]], [zeros, zeros, self.boat.physics.m * upsilon[:, 0]], [ self.boat.physics.m * upsilon[:, 1], -self.boat.physics.m * upsilon[:, 0], zeros ]]) CRB = tf.transpose(CRB, perm=[2, 0, 1]) CA = tf.stack( [[ zeros, zeros, 2 * ((self.boat.physics.Y_v_dot * upsilon[:, 1]) + ((self.boat.physics.Y_r_dot + self.boat.physics.N_v_dot) / 2) * upsilon[:, 2]) ], [ zeros, zeros, -self.boat.physics.X_u_dot * self.boat.physics.m * upsilon[:, 0] ], [ 2 * (((-self.boat.physics.Y_v_dot) * upsilon[:, 1]) - ((self.boat.physics.Y_r_dot + self.boat.physics.N_v_dot) / 2) * upsilon[:, 2]), self.boat.physics.X_u_dot * self.boat.physics.m * upsilon[:, 0], zeros ]]) CA = tf.transpose(CA, perm=[2, 0, 1]) C = CRB + CA Dl = tf.stack([[-Xu, zeros, zeros], [zeros, -Yv, -Yr], [zeros, -Nv, -Nr]]) Dl = tf.transpose(Dl, perm=[2, 0, 1]) Dn = tf.stack([[Xuu * abs(upsilon[:, 0]), zeros, zeros], [ zeros, self.boat.physics.Yvv * tf.abs(upsilon[:, 1]) + self.boat.physics.Yvr * tf.abs(upsilon[:, 2]), self.boat.physics.Yrv * tf.abs(upsilon[:, 1]) + self.boat.physics.Yrr * tf.abs(upsilon[:, 2]) ], [ zeros, self.boat.physics.Nvv * tf.abs(upsilon[:, 1]) + self.boat.physics.Nvr * tf.abs(upsilon[:, 2]), self.boat.physics.Nrv * tf.abs(upsilon[:, 1]) + self.boat.physics.Nrr * tf.abs(upsilon[:, 2]) ]]) Dn = tf.transpose(Dn, perm=[2, 0, 1]) D = Dl - Dn upsilon = tf.reshape(upsilon, [self.batch_size, 3, 1]) upsilon_dot = tf.matmul( tf.linalg.inv(M), (T - tf.matmul(C, upsilon) - tf.matmul(D, upsilon))) upsilon = (self.train_dt) * upsilon_dot + upsilon # integral J = tf.stack([[tf.cos(eta[:, 2]), -tf.sin(eta[:, 2]), zeros], [tf.sin(eta[:, 2]), tf.cos(eta[:, 2]), zeros], [zeros, zeros, ones]]) J = tf.transpose(J, perm=[2, 0, 1]) eta_dot = tf.matmul( J, upsilon) # transformation into local reference frame eta = tf.reshape(eta, [self.batch_size, 3, 1]) eta = (self.train_dt) * eta_dot + eta # integral # eta[:, 2] = tf.where(tf.greater(tf.abs(eta[:, 2]), np.pi)) # eta[2] = (self.eta[2]/abs(self.eta[2]))*(abs(self.eta[2])-2*np.pi) eta = tf.reshape(eta, [self.batch_size, 3]) upsilon = tf.reshape(upsilon, [self.batch_size, 3]) next_state = tf.concat([eta, upsilon], axis=1) reward = self.get_reward(next_state) return next_state, reward
def main(unused_argv): if not tf.gfile.IsDirectory(FLAGS.eval_dir): tf.gfile.MakeDirs(FLAGS.eval_dir) cfg, _ = get_named_config(FLAGS.model_cfg, FLAGS.model_cfg_overrides) # Load data with tf.name_scope("loader"): feat_dict = load_noteseqs( FLAGS.dataset_fp, cfg.eval_batch_size, cfg.eval_seq_len, max_discrete_times=cfg.data_max_discrete_times, max_discrete_velocities=cfg.data_max_discrete_velocities, augment_stretch_bounds=None, augment_transpose_bounds=None, randomize_chord_order=cfg.data_randomize_chord_order, repeat=False) # Build model with tf.variable_scope("phero_model"): model_dict = build_genie_model(feat_dict, cfg, cfg.eval_batch_size, cfg.eval_seq_len, is_training=False) genie_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="phero_model") # Build gold model eval_gold = False if cfg.stp_emb_vq or cfg.stp_emb_iq: eval_gold = True with tf.variable_scope("phero_model", reuse=True): gold_feat_dict = { "midi_pitches": tf.placeholder(tf.int32, [1, None]), "velocities": tf.placeholder(tf.int32, [1, None]), "delta_times_int": tf.placeholder(tf.int32, [1, None]) } gold_seq_maxlen = gold.gold_longest() gold_seq_varlens = tf.placeholder(tf.int32, [1]) gold_buttons = tf.placeholder(tf.int32, [1, None]) gold_model_dict = build_genie_model(gold_feat_dict, cfg, 1, gold_seq_maxlen, is_training=False, seq_varlens=gold_seq_varlens) gold_encodings = gold_model_dict["stp_emb_vq_discrete" if cfg. stp_emb_vq else "stp_emb_iq_discrete"] gold_mask = tf.sequence_mask(gold_seq_varlens, maxlen=gold_seq_maxlen, dtype=tf.float32) gold_diff = tf.cast(gold_buttons, tf.float32) - tf.cast( gold_encodings, tf.float32) gold_diff_l2 = tf.square(gold_diff) gold_diff_l1 = tf.abs(gold_diff) weighted_avg = lambda t, m: tf.reduce_sum(t * m) / tf.reduce_sum(m) gold_diff_l2 = weighted_avg(gold_diff_l2, gold_mask) gold_diff_l1 = weighted_avg(gold_diff_l1, gold_mask) gold_diff_l2_placeholder = tf.placeholder(tf.float32, [None]) gold_diff_l1_placeholder = tf.placeholder(tf.float32, [None]) summary_name_to_batch_tensor = {} # Summarize quantized step embeddings if cfg.stp_emb_vq: summary_name_to_batch_tensor["codebook_perplexity"] = model_dict[ "stp_emb_vq_codebook_ppl"] summary_name_to_batch_tensor["loss_vqvae"] = model_dict[ "stp_emb_vq_loss"] # Summarize integer-quantized step embeddings if cfg.stp_emb_iq: summary_name_to_batch_tensor["discrete_perplexity"] = model_dict[ "stp_emb_iq_discrete_ppl"] summary_name_to_batch_tensor["iq_valid_p"] = model_dict[ "stp_emb_iq_valid_p"] summary_name_to_batch_tensor["loss_iq_range"] = model_dict[ "stp_emb_iq_range_penalty"] summary_name_to_batch_tensor["loss_iq_contour"] = model_dict[ "stp_emb_iq_contour_penalty"] summary_name_to_batch_tensor["loss_iq_deviate"] = model_dict[ "stp_emb_iq_deviate_penalty"] if cfg.stp_emb_vq or cfg.stp_emb_iq: summary_name_to_batch_tensor["contour_violation"] = model_dict[ "contour_violation"] summary_name_to_batch_tensor["deviate_violation"] = model_dict[ "deviate_violation"] # Summarize VAE sequence embeddings if cfg.seq_emb_vae: summary_name_to_batch_tensor["loss_kl"] = model_dict["seq_emb_vae_kl"] # Reconstruction loss summary_name_to_batch_tensor["loss_recons"] = model_dict["dec_recons_loss"] summary_name_to_batch_tensor["ppl_recons"] = tf.exp( model_dict["dec_recons_loss"]) if cfg.dec_pred_velocity: summary_name_to_batch_tensor["loss_recons_velocity"] = model_dict[ "dec_recons_velocity_loss"] summary_name_to_batch_tensor["ppl_recons_velocity"] = tf.exp( model_dict["dec_recons_velocity_loss"]) # Create dataset summaries summaries = [] summary_name_to_placeholder = {} for name in summary_name_to_batch_tensor: placeholder = tf.placeholder(tf.float32, [None]) summary_name_to_placeholder[name] = placeholder summaries.append(tf.summary.scalar(name, tf.reduce_mean(placeholder))) if eval_gold: summary_name_to_placeholder["gold_diff_l2"] = gold_diff_l2_placeholder summaries.append( tf.summary.scalar("gold_diff_l2", tf.reduce_mean(gold_diff_l2_placeholder))) summary_name_to_placeholder["gold_diff_l1"] = gold_diff_l1_placeholder summaries.append( tf.summary.scalar("gold_diff_l1", tf.reduce_mean(gold_diff_l1_placeholder))) summaries = tf.summary.merge(summaries) summary_writer = tf.summary.FileWriter(FLAGS.eval_dir) # Create saver step = tf.train.get_or_create_global_step() saver = tf.train.Saver(genie_vars + [step], max_to_keep=None) def _eval_all(sess): """Gathers all metrics for a ckpt.""" summaries = collections.defaultdict(list) if eval_gold: for midi_notes, buttons, seq_varlen in gold.gold_iterator([-6, 6]): gold_diff_l1_seq, gold_diff_l2_seq = sess.run( [gold_diff_l1, gold_diff_l2], { gold_feat_dict["midi_pitches"]: midi_notes, gold_feat_dict["delta_times_int"]: np.ones_like(midi_notes) * 8, gold_seq_varlens: [seq_varlen], gold_buttons: buttons }) summaries["gold_diff_l1"].append(gold_diff_l1_seq) summaries["gold_diff_l2"].append(gold_diff_l2_seq) while True: try: batches = sess.run(summary_name_to_batch_tensor) except tf.errors.OutOfRangeError: break for name, scalar in batches.items(): summaries[name].append(scalar) return summaries # Eval if FLAGS.ckpt_fp is None: ckpt_fp = None while True: latest_ckpt_fp = tf.train.latest_checkpoint(FLAGS.train_dir) if latest_ckpt_fp != ckpt_fp: print("Eval: {}".format(latest_ckpt_fp)) with tf.Session() as sess: sess.run(tf.local_variables_initializer()) saver.restore(sess, latest_ckpt_fp) ckpt_summaries = _eval_all(sess) ckpt_summaries, ckpt_step = sess.run( [summaries, step], feed_dict={ summary_name_to_placeholder[n]: v for n, v in ckpt_summaries.items() }) summary_writer.add_summary(ckpt_summaries, ckpt_step) saver.save(sess, os.path.join(FLAGS.eval_dir, "ckpt"), global_step=ckpt_step) print("Done") ckpt_fp = latest_ckpt_fp time.sleep(1) else: with tf.Session() as sess: sess.run(tf.local_variables_initializer()) saver.restore(sess, FLAGS.ckpt_fp) ckpt_summaries = _eval_all(sess) ckpt_step = sess.run(step) print("-" * 80) print("Ckpt: {}".format(FLAGS.ckpt_fp)) print("Step: {}".format(ckpt_step)) for n, l in sorted(list(ckpt_summaries.items()), key=lambda x: x[0]): print("{}: {}".format(n, np.mean(l)))
def create_dual_ibp_approx(num_layers, batch_size, action_max, W_T_list, b_T_list, action_tensor_center, return_full_info=False): #layers_n: number of hidden units each layer #W_T_list, b_T_list: multiplicatie and bias weights for each layer #X: raw input, y: one-hot encoding of labels # List of bounds (l_i,u_i) for i = 1,...,K-1 l_list = [ action_tensor_center - action_max * tf.ones_like(action_tensor_center) ] u_list = [ action_tensor_center + action_max * tf.ones_like(action_tensor_center) ] # List of transition matrices D_i for i = 1,...,K-1 D_list = [tf.zeros_like(action_tensor_center)] # Indicators of spanning ReLu neurons for i = 1,...,K-1 I_list = [tf.zeros_like(action_tensor_center)] # Indicators of active ReLu neurons for i = 1,...,K-1 Ip_list = [tf.zeros_like(action_tensor_center)] # Final list of duals nu_i for i = 1,...,K-1 Nu_list = [ tf.zeros([batch_size, W_T_list[0].get_shape().as_list()[1], 1]) for i in range(num_layers - 1) ] # Initialize Nu_K Nu_K = -tf.expand_dims(-tf.eye(1), axis=-1) # Final list of b_i'*nu_{i+1} for i = 1,...,K-1 gamma_list = [b_T_list[i] for i in range(num_layers - 1)] ################## get bounds for layers i = 2,...,K-1 for i in range(2, num_layers): pre_l_i = l_list[-1] pre_u_i = u_list[-1] mu_i = 0.5 * (pre_l_i + pre_u_i) r_i = 0.5 * (pre_u_i - pre_l_i) l_i = tf.matmul(mu_i, W_T_list[i - 2]) - tf.matmul( r_i, tf.abs(W_T_list[i - 2])) + b_T_list[i - 2] u_i = tf.matmul(mu_i, W_T_list[i - 2]) + tf.matmul( r_i, tf.abs(W_T_list[i - 2])) + b_T_list[i - 2] l_list.append(l_i) u_list.append(u_i) # form Ip, I Ip_i, I_i = dual_method.get_I(l_list[-1], u_list[-1]) I_list.append(I_i) Ip_list.append(Ip_i) # form D D_i = dual_method.get_D(l_list[-1], u_list[-1], Ip_i, I_i) D_list.append(D_i) ############## Go backward and form Nu_i # initialize Nu_{K-1} & gamma_{K-1} Nu_list[-1] = tf.einsum('ij,jk->ijk', D_list[-1], W_T_list[-1]) Nu_K = tf.tile(Nu_K, [Nu_list[-1].get_shape().as_list()[0], 1, 1]) Nu_list[-1] = tf.einsum('ijk,ikm->ijm', Nu_list[-1], Nu_K) gamma_list[-1] = tf.einsum('ij,ijm->im', gamma_list[-1], Nu_K) # initialize lv_sum lv_sum = tf.einsum('ij,ijm->im', l_list[-1] * I_list[-1], tf.nn.relu(Nu_list[-1])) # update Nu_j for layers j = K-2,...,2 # and gamma_j for layers j = K-2,...,2 for j in range(num_layers - 2, 1, -1): Nu_hat_j = tf.einsum('jk,ikm->ijm', W_T_list[j - 1], Nu_list[j]) gamma_list[j - 1] = tf.einsum('ij,ijm->im', b_T_list[j - 1], Nu_list[j]) Nu_list[j - 1] = tf.einsum('ij,ijk->ijk', D_list[j - 1], Nu_hat_j) lv_sum = tf.add( lv_sum, tf.einsum('ij,ijm->im', l_list[j - 1] * I_list[j - 1], tf.nn.relu(Nu_list[j - 1]))) # update nu_hat_1 and gamma_1 Nu_hat_1 = tf.einsum('jk,ikm->ijm', W_T_list[0], Nu_list[1]) gamma_list[0] = tf.einsum('ij,ijm->im', b_T_list[0], Nu_list[1]) # Compute J_tilde psi = tf.einsum('ij,ijm->im', action_tensor_center, Nu_hat_1) + tf.add_n(gamma_list) Nu_hat_1_norm = tf.norm(Nu_hat_1, 1, axis=1, keepdims=False) J_tilde = -psi - action_max * Nu_hat_1_norm + lv_sum if return_full_info: return (-J_tilde, l_list, u_list, D_list, Nu_list, lv_sum, gamma_list, psi, Nu_hat_1) else: return -J_tilde
def build_train_graph(self, inputs, min_depth, max_depth, num_mpi_planes, learning_rate=0.0002, beta1=0.9, vgg_model_file=None, global_step=0): """Construct the training computation graph. Args: inputs: dictionary of tensors (see 'input_data' below) needed for training min_depth: minimum depth for the PSV and MPI planes max_depth: maximum depth for the PSV and MPI planes num_mpi_planes: number of MPI planes to infer learning_rate: learning rate beta1: hyperparameter for Adam vgg_model_file: path to vgg weights (needed when vgg loss is used) global_step: current optimization step Returns: A train_op to be used for training. """ print("starting to build graph") with tf.name_scope("input_size_randomization"): dim_choices = tf.constant([[1, 16], [2, 32], [4, 32], [4, 64], [4, 128], [8, 32], [8, 64], [8, 128]], dtype=tf.int32) rand_dim = tf.random_shuffle(dim_choices)[0, :] height_div = rand_dim[0] width_div = rand_dim[0] num_mpi_planes = rand_dim[1] tf.summary.scalar("num_mpi_planes", num_mpi_planes) with tf.name_scope("setup"): mpi_planes = self.inv_depths(min_depth, max_depth, num_mpi_planes) with tf.name_scope("input_data"): raw_tgt_image = inputs["tgt_image"] raw_ref_image = inputs["ref_image"] raw_src_images = inputs["src_images"] _, img_height, img_width, _ = raw_src_images.get_shape().as_list( ) img_height = img_height // height_div img_width = img_width // width_div raw_tgt_image = tf.image.convert_image_dtype( raw_tgt_image, dtype=tf.float32) raw_ref_image = tf.image.convert_image_dtype( raw_ref_image, dtype=tf.float32) raw_src_images = tf.image.convert_image_dtype( raw_src_images, dtype=tf.float32) raw_tgt_image = tf.image.resize_area(raw_tgt_image, [img_height, img_width]) raw_ref_image = tf.image.resize_area(raw_ref_image, [img_height, img_width]) raw_src_images = tf.image.resize_area(raw_src_images, [img_height, img_width]) tgt_pose = inputs["tgt_pose"] ref_pose = inputs["ref_pose"] src_poses = inputs["src_poses"] intrinsics = inputs["intrinsics"] # Scale intrinsics based on size randomization intrinsics = tf.concat([ intrinsics[:, 0:1, :] / tf.to_float(width_div), intrinsics[:, 1:2, :] / tf.to_float(height_div), intrinsics[:, 2:3, :] ], axis=1) inputs["intrinsics"] = intrinsics _, num_source, _, _ = src_poses.get_shape().as_list() with tf.name_scope("inference"): print("setting up MPI inference") num_mpi_planes = tf.shape(mpi_planes)[0] pred = self.infer_mpi(raw_src_images, raw_ref_image, ref_pose, src_poses, intrinsics, num_mpi_planes, mpi_planes) rgba_layers = pred["rgba_layers"] rgba_layers_refine = pred["rgba_layers_refine"] stuff_behind = pred["stuff_behind"] refine_input_mpi = pred["refine_input_mpi"] psv = pred["psv"] with tf.name_scope("synthesis"): print("setting up rendering") rel_pose = tf.matmul(tgt_pose, tf.matrix_inverse(ref_pose)) output_image, output_layers = self.mpi_render_view( rgba_layers, rel_pose, mpi_planes, intrinsics) output_alpha = output_layers[Ellipsis, -1] output_image_refine, _ = self.mpi_render_view( rgba_layers_refine, rel_pose, mpi_planes, intrinsics) with tf.name_scope("loss"): print("computing losses") # Mask loss for pixels outside reference frustum loss_mask = tf.where( tf.equal( tf.reduce_min( tf.abs(tf.reduce_sum(output_layers, axis=-1)), axis=3, keep_dims=True), 0.0), tf.zeros_like(output_alpha[:, :, :, 0:1]), tf.ones_like(output_alpha[:, :, :, 0:1])) loss_mask = tf.stop_gradient(loss_mask) tf.summary.image("loss_mask", loss_mask) # Helper functions for loss def compute_error(real, fake, mask): return tf.reduce_mean(mask * tf.abs(fake - real)) # Normalized VGG loss (from # https://github.com/CQFIO/PhotographicImageSynthesis) downsample = lambda tensor, ds: tf.nn.avg_pool(tensor, [1, ds, ds, 1], [1, ds, ds, 1], "SAME") def vgg_loss(raw_tgt_image, output_image, loss_mask): """Compute VGG loss.""" vgg_real = build_vgg19(raw_tgt_image * 255.0, vgg_model_file) rescaled_output_image = (output_image + 1.)/2. * 255.0 vgg_fake = build_vgg19( rescaled_output_image, vgg_model_file, reuse=True) p0 = compute_error(vgg_real["input"], vgg_fake["input"], loss_mask) p1 = compute_error(vgg_real["conv1_2"], vgg_fake["conv1_2"], loss_mask)/2.6 p2 = compute_error(vgg_real["conv2_2"], vgg_fake["conv2_2"], downsample(loss_mask, 2))/4.8 p3 = compute_error(vgg_real["conv3_2"], vgg_fake["conv3_2"], downsample(loss_mask, 4))/3.7 p4 = compute_error(vgg_real["conv4_2"], vgg_fake["conv4_2"], downsample(loss_mask, 8))/5.6 p5 = compute_error(vgg_real["conv5_2"], vgg_fake["conv5_2"], downsample(loss_mask, 16))*10/1.5 total_loss = p0+p1+p2+p3+p4+p5 return total_loss, vgg_real, vgg_fake vgg_loss_initial, _, _ = vgg_loss(raw_tgt_image, output_image, loss_mask) tf.summary.scalar("vgg_loss_initial", vgg_loss_initial) total_loss = vgg_loss_initial vgg_loss_refine, _, _ = vgg_loss(raw_tgt_image, output_image_refine, loss_mask) tf.summary.scalar("vgg_loss_refine", vgg_loss_refine) total_loss += vgg_loss_refine with tf.name_scope("train_op"): print("setting up train op") train_vars = [var for var in tf.trainable_variables()] optim = tf.train.AdamOptimizer(learning_rate, beta1) grads_and_vars = optim.compute_gradients(total_loss, var_list=train_vars) train_op = [optim.apply_gradients(grads_and_vars)] # Summaries tf.summary.scalar("total_loss", total_loss) # Source images for i in range(num_source): src_image = raw_src_images[:, :, :, i*3:(i+1)*3] tf.summary.image("src_image_%d" % i, src_image) # Output image tf.summary.image("output_image", self.deprocess_image(output_image)) # Refined output image tf.summary.image("output_image_refine", self.deprocess_image(output_image_refine)) # Target image tf.summary.image("tgt_image", raw_tgt_image) # Ref image tf.summary.image("ref_image", raw_ref_image) # Predicted color and alpha layers, and PSV num_summ = 16 # Number of plane summaries to show in tensorboard for i in range(num_summ): ind = tf.to_int32(i * num_mpi_planes/num_summ) rgb = rgba_layers[:, :, :, ind, :3] alpha = rgba_layers[:, :, :, ind, -1:] ref_plane = psv[:, :, :, ind, 3:6] source_plane = psv[:, :, :, ind, :3] output_rgb = output_layers[:, :, :, ind, :3] tf.summary.image("rgb_layer_%d" % i, self.deprocess_image(rgb)) tf.summary.image("alpha_layer_%d" % i, alpha) tf.summary.image("rgba_layer_%d" % i, self.deprocess_image(rgb * alpha)) tf.summary.image("psv_avg_%d" % i, (self.deprocess_image(0.5*ref_plane + 0.5*source_plane))) tf.summary.image("output_rgb_%d" % i, self.deprocess_image(output_rgb)) tf.summary.image("psv_ref_%d" % i, self.deprocess_image(ref_plane)) tf.summary.image("psv_source_%d" % i, self.deprocess_image(source_plane)) # Cumulative rendered images and refined MPI for i in range(num_summ): ind = tf.to_int32(i * num_mpi_planes/num_summ) rgb = rgba_layers_refine[:, :, :, ind, :3] alpha = rgba_layers_refine[:, :, :, ind, 3:] render = stuff_behind[:, :, :, ind, :3] input_colors = refine_input_mpi[:, :, :, ind, :3] tf.summary.image("rgb_layer_refine_%d" % i, self.deprocess_image(rgb)) tf.summary.image("alpha_layer_refine_%d" % i, alpha) tf.summary.image("rgba_layer_refine_%d" % i, self.deprocess_image(rgb * alpha)) tf.summary.image("cumulative_render_%d" % i, self.deprocess_image(render)) tf.summary.image("input_colors_refine_%d" % i, self.deprocess_image(input_colors)) return train_op
def mask(target, mask): mask_h = tf.abs(mask - 1) return tf.stop_gradient(mask_h * target) + mask * target
def _compute_inner_update_scinol(self, var, grad, state): update_ops = [] betting_domain = tf.cast( state.get_hyper(BETTING_DOMAIN), var.dtype.base_dtype) reward = state.get_slot(var, INNER_REWARD) betting_fraction = state.get_slot(var, OUTER_BETTING_FRACTION) sum_grad_squared = state.get_slot(var, INNER_SUM_GRAD_SQUARED) sum_grad = state.get_slot(var, INNER_SUM_GRAD) inner_maximum_gradient = state.get_slot(var, INNER_MAXIMUM_GRADIENT) # clip inner gradient to respect previous inner_maximum_gradient value # This introduces at most an additive constant overhead in the regret # since the inner betting fraction lies in a bounded domain. clipped_grad = tf.clip_by_value(grad, -inner_maximum_gradient, inner_maximum_gradient) with tf.control_dependencies([clipped_grad]): inner_maximum_gradient_updated = self._assign( inner_maximum_gradient, tf.maximum(inner_maximum_gradient, tf.abs(grad))) update_ops.append(inner_maximum_gradient_updated) clipped_old_betting_fraction = tf.clip_by_value(betting_fraction, -betting_domain, betting_domain) # Process grad to respect truncation to [-betting_domain, betting_domain] truncated_grad = tf.where( tf.greater_equal( clipped_grad * (betting_fraction - clipped_old_betting_fraction), 0.0), clipped_grad, tf.zeros(tf.shape(clipped_grad))) reward_delta = -betting_fraction * truncated_grad reward_updated = self._assign_add(reward, reward_delta) update_ops.append(reward_updated) sum_grad_squared_updated = self._assign_add(sum_grad_squared, tf.square(truncated_grad)) update_ops.append(sum_grad_squared_updated) sum_grad_updated = self._assign_add(sum_grad, truncated_grad) update_ops.append(sum_grad_updated) # The second term in this maximum, inner_maximum_gradient_updated / self.eta # is a hack to force the betting fraction to not be too big at first. scaling = tf.minimum(tf.rsqrt(sum_grad_squared_updated + tf.square(inner_maximum_gradient_updated)), self.eta/inner_maximum_gradient_updated) theta = -sum_grad_updated * scaling # rescale inner flag is a hack that rescales the epsilon_v by the # maximum inner gradient. if self.rescale_inner: epsilon_scaling = inner_maximum_gradient_updated else: epsilon_scaling = 1.0 inner_betting_fraction = tf.sign(theta) * tf.minimum(tf.abs(theta), 1.0) * scaling / 2.0 new_betting_fraction = inner_betting_fraction * ( reward_updated + epsilon_scaling * self.epsilon_v) betting_fraction_updated = self._assign(betting_fraction, new_betting_fraction) update_ops.append(betting_fraction_updated) clipped_betting_fraction = tf.clip_by_value(betting_fraction_updated, -betting_domain, betting_domain) if self.output_summaries: mean_unclipped_betting_fraction_summary = tf.reduce_mean( tf.abs(betting_fraction_updated)) max_unclipped_betting_fraction_summary = tf.reduce_max( tf.abs(betting_fraction_updated)) mean_clipped_betting_fraction_summary = tf.reduce_mean( tf.abs(clipped_betting_fraction)) max_clipped_betting_fraction_summary = tf.reduce_max( tf.abs(clipped_betting_fraction)) max_abs_gradient = tf.reduce_max(tf.abs(grad)) max_truncated_grad = tf.reduce_max(tf.abs(truncated_grad)) tf.summary.scalar(self._name + "/mean_unclipped_bet/" + var.name, mean_unclipped_betting_fraction_summary) tf.summary.scalar(self._name + "/max_unclipped_bet/" + var.name, max_unclipped_betting_fraction_summary) tf.summary.scalar(self._name + "/mean_clipped_bet/" + var.name, mean_clipped_betting_fraction_summary) tf.summary.scalar(self._name + "/max_clipped_bet/" + var.name, max_clipped_betting_fraction_summary) tf.summary.scalar(self._name + "/max_abs_inner_grad/" + var.name, max_abs_gradient) tf.summary.scalar( self._name + "/max_abs_truncated_inner_grad/" + var.name, max_truncated_grad) return clipped_betting_fraction, tf.group(*update_ops)
def _build_train_op(self): """Builds a training op. Returns: train_op: An op performing one step of training from replay data. """ batch_size = tf.shape(self._replay.rewards)[0] target_quantile_values = tf.stop_gradient( self._build_target_quantile_values_op()) # Reshape to self.num_tau_prime_samples x batch_size x 1 since this is # the manner in which the target_quantile_values are tiled. target_quantile_values = tf.reshape( target_quantile_values, [self.num_tau_prime_samples, batch_size, 1]) # Transpose dimensions so that the dimensionality is batch_size x # self.num_tau_prime_samples x 1 to prepare for computation of # Bellman errors. # Final shape of target_quantile_values: # batch_size x num_tau_prime_samples x 1. target_quantile_values = tf.transpose(target_quantile_values, [1, 0, 2]) # Shape of indices: (num_tau_samples x batch_size) x 1. # Expand dimension by one so that it can be used to index into all the # quantiles when using the tf.gather_nd function (see below). indices = tf.range(self.num_tau_samples * batch_size)[:, None] # Expand the dimension by one so that it can be used to index into all the # quantiles when using the tf.gather_nd function (see below). reshaped_actions = self._replay.actions[:, None] reshaped_actions = tf.tile(reshaped_actions, [self.num_tau_samples, 1]) # Shape of reshaped_actions: (num_tau_samples x batch_size) x 2. reshaped_actions = tf.concat([indices, reshaped_actions], axis=1) chosen_action_quantile_values = tf.gather_nd( self._replay_net_quantile_values, reshaped_actions) # Reshape to self.num_tau_samples x batch_size x 1 since this is the manner # in which the quantile values are tiled. chosen_action_quantile_values = tf.reshape( chosen_action_quantile_values, [self.num_tau_samples, batch_size, 1]) # Transpose dimensions so that the dimensionality is batch_size x # self.num_tau_samples x 1 to prepare for computation of # Bellman errors. # Final shape of chosen_action_quantile_values: # batch_size x num_tau_samples x 1. chosen_action_quantile_values = tf.transpose( chosen_action_quantile_values, [1, 0, 2]) # Shape of bellman_erors and huber_loss: # batch_size x num_tau_prime_samples x num_tau_samples x 1. bellman_errors = target_quantile_values[:, :, None, :] - chosen_action_quantile_values[:, None, :, :] # The huber loss (see Section 2.3 of the paper) is defined via two cases: # case_one: |bellman_errors| <= kappa # case_two: |bellman_errors| > kappa huber_loss_case_one = ( tf.cast(tf.abs(bellman_errors) <= self.kappa, tf.float32) * 0.5 * bellman_errors**2) huber_loss_case_two = ( tf.cast(tf.abs(bellman_errors) > self.kappa, tf.float32) * self.kappa * (tf.abs(bellman_errors) - 0.5 * self.kappa)) huber_loss = huber_loss_case_one + huber_loss_case_two # Reshape replay_quantiles to batch_size x num_tau_samples x 1 replay_quantiles = tf.reshape(self._replay_net_quantiles, [self.num_tau_samples, batch_size, 1]) replay_quantiles = tf.transpose(replay_quantiles, [1, 0, 2]) # Tile by num_tau_prime_samples along a new dimension. Shape is now # batch_size x num_tau_prime_samples x num_tau_samples x 1. # These quantiles will be used for computation of the quantile huber loss # below (see section 2.3 of the paper). replay_quantiles = tf.cast( tf.tile(replay_quantiles[:, None, :, :], [1, self.num_tau_prime_samples, 1, 1]), tf.float32) # Shape: batch_size x num_tau_prime_samples x num_tau_samples x 1. quantile_huber_loss = ( tf.abs(replay_quantiles - tf.stop_gradient(tf.cast(bellman_errors < 0, tf.float32))) * huber_loss) / self.kappa # Sum over current quantile value (num_tau_samples) dimension, # average over target quantile value (num_tau_prime_samples) dimension. # Shape: batch_size x num_tau_prime_samples x 1. loss = tf.reduce_sum(quantile_huber_loss, axis=2) # Shape: batch_size x 1. loss = tf.reduce_mean(loss, axis=1) update_priorities_op = tf.no_op() with tf.control_dependencies([update_priorities_op]): if self.summary_writer is not None: with tf.variable_scope('Losses'): tf.summary.scalar('QuantileLoss', tf.reduce_mean(loss)) return self.optimizer.minimize( tf.reduce_mean(loss)), tf.reduce_mean(loss)