def initialize_neural_states(self, n): '''Neural states''' inp = Dense(self.n_hidden, activation=tf.nn.relu, name="bb_hidden", input_shape=(n, )) #activation = tf.nn.tanh act_layer = Dense(4 + self.n_latent_species, activation=tf.nn.sigmoid, name="bb_act") deg_layer = Dense(4 + self.n_latent_species, activation=tf.nn.sigmoid, name="bb_deg") act = Sequential([inp, act_layer]) deg = Sequential([inp, deg_layer]) for layer in [inp, act_layer, deg_layer]: weights, bias = layer.weights variable_summaries(weights, layer.name + "_kernel", False) variable_summaries(bias, layer.name + "_bias", False) return act, deg
def call(self, inputs): X = inputs[0] # Node features (N x F) A = inputs[1] # Adjacency matrix (N x N) # Parameters N = K.shape(X)[0] # Number of nodes in the graph with tf.name_scope(self.gcn_layer_name): outputs = [] for head in range(self.attn_heads): with tf.name_scope(f'kernel_{head}'): kernel = self.kernels[head] # W in the paper (F x F') variable_summaries(kernel[head]) with tf.name_scope(f'attention_{head}'): # Compute inputs to attention network attention_kernel = self.attn_kernels[ head] # Attention kernel a in the paper (2F' x 1) variable_summaries(attention_kernel[head]) linear_transf_X = K.dot(X, kernel) # (N x F') # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] attn_for_self = K.dot( linear_transf_X, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] attn_for_neighs = K.dot( linear_transf_X, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] dense = attn_for_self + K.transpose( attn_for_neighs) # (N x N) via broadcasting # Add nonlinearty dense = LeakyReLU(alpha=0.2)(dense) # Mask values before activation (Vaswani et al., 2017) mask = K.exp(A * -10e9) * -10e9 masked = dense + mask # Feed masked values to softmax softmax = K.softmax(masked) # (N x N), attention coefficients dropout = Dropout(self.attn_dropout)(softmax) # (N x N) # Linear combination with neighbors' features node_features = K.dot(dropout, linear_transf_X) # (N x F') if self.attn_heads_reduction == 'concat' and self.activation is not None: # In case of 'concat', we compute the activation here (Eq 5) node_features = self.activation(node_features) # Add output of attention head to final output outputs.append(node_features) with tf.name_scope("activations"): # Reduce the attention heads output according to the reduction method if self.attn_heads_reduction == 'concat': output = K.concatenate(outputs) # (N x KF') else: output = K.mean(K.stack(outputs), axis=0) # N x F') if self.activation is not None: # In case of 'average', we compute the activation here (Eq 6) output = self.activation(output) tf.summary.histogram('activations', output) return output
def call(self, inputs): X = inputs[0] # Node features (N x F) A = inputs[1] # k-Adjacency matrices (N x N x k) k_repeat = lambda x: K.repeat_elements( K.expand_dims(x, axis=-1), rep=self.num_hops, axis=2) with tf.name_scope(self.gcn_layer_name): outputs = [] for head in range(self.attn_heads): with tf.name_scope(f'kernel_{head}'): kernel = self.kernels[head] # W in the paper (F x F') variable_summaries(kernel[head]) with tf.name_scope(f'attention_{head}'): attention_kernel = self.attn_kernels[ head] # Attention kernel a in the paper (2F' x 1) variable_summaries(attention_kernel[head]) # Compute inputs to attention network linear_transf_X = K.dot(X, kernel) # (N x F') # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1k]^T [Wh_i] + [a_2k]^T [Wh_j] # TODO: normalize attention with softmax in K dim attn_for_self = K.dot( linear_transf_X, attention_kernel[0]) # (N x 1 x K), [a_1k]^T [Wh_i] attn_for_neighs = K.dot( linear_transf_X, attention_kernel[1]) # (N x 1 x K), [a_2k]^T [Wh_j] if self.attn_mode == "full": # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] # comment: Neat way to create Topliz matrix (diag-const matrix) from two 1-D vectors # (v1,v2) with the following sum structure [[v1:+v21],[v1:+v22],...,[v1:+v2N]] dense = K.transpose( K.transpose(K.expand_dims(attn_for_self, 1)) + K.expand_dims(K.transpose(attn_for_neighs), 2)) # (N x N x K) via broadcasting if self.weight_mask == True: # masking with weights of path (giving structure additional meaning) dense = dense * A # Mask values before activation (Vaswani et al., 2017) # Add nonlinearty dense = LeakyReLU(alpha=0.2)(dense) # TODO: try different comparison like zeroing nonlikely paths. # Using mask values will probably dump values very low. Some variations can be tested # 1. comparison = K.less_equal(A, K.const(1e-15)) # 2. K.max(dense * mask, axis=2) # take highest value of dense * mask # 3. dense[K.max(mask, axis=2)] # take value of most informative scale # 4. Try max instead of mean in the 'softmax=..' piece mask = K.exp(A * -10e9) * -10e9 masked = activations.softmax( dense + mask, axis=1 ) # what is the right axis to softmax? probably both softmax = K.mean( masked, axis=2 ) # a_{i,j} importance is decided by the 2nd axis mean softmax = softmax / K.sum(softmax, axis=-1, keepdims=True) elif self.attn_mode in ["layerwise", "gat"]: # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] dense = attn_for_self + K.transpose( attn_for_neighs) # (N x N) via broadcasting # Add nonlinearty dense = LeakyReLU(alpha=0.2)(dense) if self.num_hops > 1: dense = k_repeat( dense) # inflate dense dimension repeat if self.weight_mask == True: # masking with weights of path (giving structure additional meaning) dense = dense * A # Mask values before activation (Vaswani et al., 2017) mask = K.exp(A * -10e9) * -10e9 if self.attn_mode == "layerwise" and self.num_hops > 1: masked = tf.tensordot( dense + mask, self.resolution_kernel, axes=[2, 0]) # (N x N), attention coefficients elif self.num_hops == 1: masked = dense + mask # Feed masked values to softmax softmax = K.softmax( masked) # (N x N), attention coefficients dropout = Dropout(self.attn_dropout)(softmax) # (N x N) # Linear combination with neighbors' features node_features = K.dot(dropout, linear_transf_X) # (N x F') if self.attn_heads_reduction == 'concat' and self.activation is not None: # In case of 'concat', we compute the activation here (Eq 5) node_features = self.activation(node_features) # Add output of attention head to final output outputs.append(node_features) with tf.name_scope("activations"): # Reduce the attention heads output according to the reduction method if self.attn_heads_reduction == 'concat': output = K.concatenate(outputs) # (N x KF') else: output = K.mean(K.stack(outputs), axis=0) # N x F') if self.activation is not None: # In case of 'average', we compute the activation here (Eq 6) output = self.activation(output) tf.summary.histogram('activations', output) return output
def gen_reaction_equations(self, theta, treatments, dev_1hot, condition_on_device=True): n_iwae = tf.shape(theta.r)[1] # tile treatments, one per iwae sample treatments_transformed = tf.clip_by_value( tf.exp(treatments) - 1.0, 0.0, 1e6) c6a, c12a = tf.unstack(treatments_transformed, axis=1) c6 = tf.tile(tf.expand_dims(c6a, axis=1), [1, n_iwae]) c12 = tf.tile(tf.expand_dims(c12a, axis=1), [1, n_iwae]) # need to clip these to avoid overflow r = tf.clip_by_value(theta.r, 0.0, 5.0) K = theta.K tlag = theta.tlag rc = theta.rc autoY = theta.autoY autoC = theta.autoC drfp = tf.clip_by_value(theta.drfp, 1e-12, 2.0) dyfp = tf.clip_by_value(theta.dyfp, 1e-12, 2.0) dcfp = tf.clip_by_value(theta.dcfp, 1e-12, 2.0) dR = tf.clip_by_value(theta.dR, 1e-12, 5.0) dS = tf.clip_by_value(theta.dS, 1e-12, 5.0) e76 = theta.e76 e81 = theta.e81 aCFP = theta.aCFP aYFP = theta.aYFP if self.use_aRFP: aRFP = theta.aRFP KGR_76 = theta.KGR_76 KGS_76 = theta.KGS_76 KGR_81 = theta.KGR_81 KGS_81 = theta.KGS_81 KR6 = theta.KR6 KR12 = theta.KR12 KS6 = theta.KS6 KS12 = theta.KS12 nR = tf.clip_by_value(theta.nR, 0.5, 3.0) nS = tf.clip_by_value(theta.nS, 0.5, 3.0) # condition on device information by mapping param_cond = f(param, d; \phi) where d is one-hot rep of device # currently, f is a one-layer MLP with NO activation function (e.g., offset and scale only) if condition_on_device: kinit = tf.keras.initializers.RandomNormal(mean=2.0, stddev=1.5) ones = tf.tile([[1.0]], tf.shape(theta.r)) aR = self.device_conditioner(ones, 'aR', dev_1hot, kernel_initializer=kinit) aS = self.device_conditioner(ones, 'aS', dev_1hot, kernel_initializer=kinit) variable_summaries(aR, 'aR.conditioned') variable_summaries(aS, 'aS.conditioned') else: aR = theta.aR aS = theta.aS def reaction_equations(state, t): x, rfp, yfp, cfp, f510, f430, luxR, lasR = tf.unstack(state, axis=2) # Cells growing or not (not before lag-time) gr = r * tf.sigmoid(4.0 * (tf.cast(t, tf.float32) - tlag)) # Specific growth and dilution g = (1.0 - x / K) gamma = gr * g # Promoter activity boundLuxR = luxR * luxR * ((KR6 * c6)**nR + (KR12 * c12)**nR) / ( (1.0 + KR6 * c6 + KR12 * c12)**nR) boundLasR = lasR * lasR * ((KS6 * c6)**nS + (KS12 * c12)**nS) / ( (1.0 + KS6 * c6 + KS12 * c12)**nS) P76 = (e76 + KGR_76 * boundLuxR + KGS_76 * boundLasR) / ( 1.0 + KGR_76 * boundLuxR + KGS_76 * boundLasR) P81 = (e81 + KGR_81 * boundLuxR + KGS_81 * boundLasR) / ( 1.0 + KGR_81 * boundLuxR + KGS_81 * boundLasR) # Check they are finite boundLuxR = tf.verify_tensor_all_finite(boundLuxR, "boundLuxR NOT finite") boundLasR = tf.verify_tensor_all_finite(boundLasR, "boundLasR NOT finite") # Right-hand sides d_x = gamma * x if self.use_aRFP is True: d_rfp = rc * aRFP - (gamma + drfp) * rfp else: d_rfp = rc - (gamma + drfp) * rfp d_yfp = rc * aYFP * P81 - (gamma + dyfp) * yfp d_cfp = rc * aCFP * P76 - (gamma + dcfp) * cfp d_f510 = rc * autoY - gamma * f510 d_f430 = rc * autoC - gamma * f430 d_luxR = rc * aR - (gamma + dR) * luxR d_lasR = rc * aS - (gamma + dS) * lasR X = tf.stack( [d_x, d_rfp, d_yfp, d_cfp, d_f510, d_f430, d_luxR, d_lasR], axis=2) return X return reaction_equations