def __init__(self, layers_info, output_activation=None, hidden_activations="relu", dropout= 0.0, initialiser="default", batch_norm=False, y_range=(), random_seed=0, input_dim=None): Model.__init__(self) self.valid_cnn_hidden_layer_types = {'conv', 'maxpool', 'avgpool', 'linear'} self.valid_layer_types_with_no_parameters = (MaxPool2D, AveragePooling2D) Base_Network.__init__(self, layers_info, output_activation, hidden_activations, dropout, initialiser, batch_norm, y_range, random_seed, input_dim)
def __init__(self, layers_info: list, output_activation=None, input_dim=None, hidden_activations="relu", dropout: float =0.0, initialiser: str ="default", batch_norm: bool =False, columns_of_data_to_be_embedded: list =[], embedding_dimensions: list =[], y_range: tuple = (), random_seed=0, print_model_summary: bool =False): Model.__init__(self) self.embedding_to_occur = len(columns_of_data_to_be_embedded) > 0 self.columns_of_data_to_be_embedded = columns_of_data_to_be_embedded self.embedding_dimensions = embedding_dimensions self.embedding_layers = self.create_embedding_layers() TensorFlow_Base_Network.__init__(self, input_dim, layers_info, output_activation, hidden_activations, dropout, initialiser, batch_norm, y_range, random_seed, print_model_summary)
def __init__(self, num_head, mu_shared_nodes, mu_branched_nodes=None, sigma_shared_nodes=None, sigma_branched_nodes=None, learning_rate=0.001, lr_decay_step=None, lr_decay_rate=None, single_head_multi_out=False): Model.__init__(self) self.gaussian_layer = BranchedMultiHeadGaussianMLP(num_head, mu_shared_nodes, mu_branched_nodes=mu_branched_nodes, sigma_shared_nodes=sigma_shared_nodes, sigma_branched_nodes=sigma_branched_nodes, single_head_multi_out=single_head_multi_out) self.learning_rate = tf.Variable(learning_rate, trainable=False) self.lr_decay_step = lr_decay_step self.lr_decay_rate = lr_decay_rate self.optimizer = tf.keras.optimizers.RMSprop(self.learning_rate)
def __init__(self, layers_info, output_activation=None, hidden_activations="relu", dropout=0.0, initialiser="default", batch_norm=False, columns_of_data_to_be_embedded=[], embedding_dimensions=[], y_range=(), random_seed=0, input_dim=None): Model.__init__(self) self.embedding_to_occur = len(columns_of_data_to_be_embedded) > 0 self.columns_of_data_to_be_embedded = columns_of_data_to_be_embedded self.embedding_dimensions = embedding_dimensions self.embedding_layers = self.create_embedding_layers() Base_Network.__init__(self, layers_info, output_activation, hidden_activations, dropout, initialiser, batch_norm, y_range, random_seed, input_dim)
def __init__(self, layers_info, output_activation=None, hidden_activations="relu", dropout=0.0, initialiser="default", batch_norm=False, columns_of_data_to_be_embedded=[], embedding_dimensions=[], y_range=(), return_final_seq_only=True, random_seed=0, input_dim=None): Model.__init__(self) self.embedding_to_occur = len(columns_of_data_to_be_embedded) > 0 self.columns_of_data_to_be_embedded = columns_of_data_to_be_embedded self.embedding_dimensions = embedding_dimensions self.embedding_layers = self.create_embedding_layers() self.return_final_seq_only = return_final_seq_only self.valid_RNN_hidden_layer_types = {"linear", "gru", "lstm"} Base_Network.__init__(self, layers_info, output_activation, hidden_activations, dropout, initialiser, batch_norm, y_range, random_seed, input_dim)
def __init__( self, x_dim, y_dim, f_i_embedder_fn, r_ik_embedder_fn, predictor_fn, updater_fn, xy_embedder_fn, x_embedder_fn, L, num_encoder_decoder_stack, i_dim_reduction=0, o_dim_reduction=0, dot_product_attention_scale=False, max_grad_norm=0, entropy_lambda=0, learning_rate=1e-3, alter_psi=False, ): Model.__init__(self) self.L = L self.alter_psi = alter_psi self.f_i_embedder = f_i_embedder_fn() self.r_ik_embedder = r_ik_embedder_fn() self.predictor = predictor_fn(y_dim=y_dim) self.xy_embedder = xy_embedder_fn() self.xq_embedder = x_embedder_fn() self.encoder = [M.attention.Encoder() for _ in range(num_encoder_decoder_stack)] self.decoder = [M.attention.Decoder() for _ in range(num_encoder_decoder_stack)] self.encoder_decoder = [(e,d) for e,d in zip(self.encoder,self.decoder)] self.i_transform = Dense(i_dim_reduction,use_bias=False) if i_dim_reduction > 0 else None self.o_transform = Dense(o_dim_reduction,use_bias=False) if o_dim_reduction > 0 else None self.dot_product_attention_scale = dot_product_attention_scale self.updater = updater_fn() self.optimizer = tf.keras.optimizers.Adam(learning_rate) self.train_loss = tf.keras.metrics.Mean(name='train_loss') self.grad_norm = tf.keras.metrics.Mean(name='grad_norm') self.entropy = tf.keras.metrics.Mean(name='entropy') self.reports = { 'loss': self.train_loss, 'grad_norm': self.grad_norm, 'entropy': self.entropy, } @tf.function(input_signature=( tf.TensorSpec(shape=[None,None,x_dim],dtype=tf.float32), tf.TensorSpec(shape=[None,None,y_dim],dtype=tf.float32), tf.TensorSpec(shape=[None,None,x_dim],dtype=tf.float32), tf.TensorSpec(shape=[None,None,y_dim],dtype=tf.float32))) def update(x,y,q,a): with tf.GradientTape() as tape: if isinstance(self.predictor,M.ProbabilisticPrediction): a_hat, dist = self((x,y,q),training=True) loss = tf.reduce_mean(-1. * dist.log_prob(a)) entropy = tf.reduce_mean(dist.mixture_distribution.entropy()) self.entropy(entropy) if entropy_lambda > 0: loss += entropy_lambda * entropy else: a_hat, dist = self((x,y,q),training=True) loss = tf.reduce_mean(tf.reduce_sum(0.5 * (a - a_hat) ** 2,axis=-1)) if dist is not None: entropy = tf.reduce_mean(dist.mixture_distribution.entropy()) self.entropy(entropy) gradients = tape.gradient(loss, self.trainable_variables) gradients_clipped, grad_norm = tf.clip_by_global_norm(gradients, max_grad_norm) self.grad_norm(grad_norm) if max_grad_norm > 0: gradients = gradients_clipped self.optimizer.apply_gradients(zip(gradients, self.trainable_variables)) self.train_loss(loss) self.update = update