Ejemplo n.º 1
0
def SCNN(vst_onlyTokens,
         dl_terms,
         dl_associations,
         vso,
         nbEpochs=150,
         batchSize=64,
         l_numberOfFilters=[4000],
         l_filterSizes=[1],
         phraseMaxSize=15):

    data, labels, l_unkownTokens, l_uncompleteExpressions = prepare2D_data(
        vst_onlyTokens, dl_terms, dl_associations, vso, phraseMaxSize)

    embeddingSize = data.shape[2]
    ontoSpaceSize = labels.shape[2]

    inputLayer = Input(shape=(phraseMaxSize, embeddingSize))

    l_subLayers = list()
    for i, filterSize in enumerate(l_filterSizes):

        convLayer = (layers.Conv1D(
            l_numberOfFilters[i],
            filterSize,
            strides=1,
            kernel_initializer=initializers.GlorotUniform()))(inputLayer)

        outputSize = phraseMaxSize - filterSize + 1
        pool = (layers.MaxPool1D(pool_size=outputSize))(convLayer)

        activationLayer = (layers.LeakyReLU(alpha=0.3))(pool)

        l_subLayers.append(activationLayer)

    if len(l_filterSizes) > 1:
        concatenateLayer = (layers.Concatenate(axis=-1))(
            l_subLayers)  # axis=-1 // concatenating on the last dimension
    else:
        concatenateLayer = l_subLayers[0]

    convModel = Model(inputs=inputLayer, outputs=concatenateLayer)
    fullmodel = models.Sequential()
    fullmodel.add(convModel)

    fullmodel.add(
        layers.Dense(ontoSpaceSize,
                     kernel_initializer=initializers.GlorotUniform()))

    fullmodel.summary()
    fullmodel.compile(
        optimizer=optimizers.Nadam(),
        loss=losses.LogCosh(),
        metrics=[metrics.CosineSimilarity(),
                 metrics.MeanSquaredError()])
    fullmodel.fit(data, labels, epochs=nbEpochs, batch_size=batchSize)

    return fullmodel, vso, l_unkownTokens
Ejemplo n.º 2
0
 def __initialize_weights_and_biases(self, xavier):
     self.weights = [
         initializers.GlorotUniform()(
             shape=(j, i)).numpy() if xavier else np.random.randn(j, i)
         for i, j in zip(self.shape[:-1], self.shape[1:])
     ]
     self.biases = [np.random.randn(i, 1) for i in self.shape[1:]]
Ejemplo n.º 3
0
def SLFNN(vst_onlyTokens,
          dl_terms,
          dl_associations,
          vso,
          nbEpochs=100,
          batchSize=64):

    vstTerm, l_unknownToken = word2term.wordVST2TermVST(
        vst_onlyTokens, dl_terms)
    data, labels = getMatrix(dl_terms,
                             vstTerm,
                             dl_associations,
                             vso,
                             symbol="___")

    inputSize = data.shape[1]
    ontoSpaceSize = labels.shape[1]

    model = models.Sequential()
    model.add(
        layers.Dense(units=ontoSpaceSize,
                     use_bias=True,
                     kernel_initializer=initializers.GlorotUniform(),
                     input_shape=(inputSize, )))
    model.summary()

    model.compile(
        optimizer=optimizers.Nadam(),
        loss=losses.LogCosh(),
        metrics=[metrics.CosineSimilarity(),
                 metrics.MeanSquaredError()])
    model.fit(data, labels, epochs=nbEpochs, batch_size=batchSize)

    return model, vso, l_unknownToken
Ejemplo n.º 4
0
	def __init__(self,fin,fout=1):
		super(TemporalAttention,self).__init__()
		self.fin = fin # 输入维度
		self.fout = fout # 输出维度 这里为1 求得是分数
		
		self.initializer = initializers.GlorotUniform() # 初始化分布
		# 自定义可学习参数
		self.w = tf.Variable(self.initializer(shape=[self.fin, self.fout], dtype=tf.float32))
Ejemplo n.º 5
0
    def __init__(self,fout):
        super(Decoder,self).__init__()

        self.fout = fout

        self.fc = layers.Dense(self.fout)
        self.resweight = tf.Variable(0.0,trainable=True) # 线性函数
        self.initializer = initializers.GlorotUniform()
        self.bias = tf.Variable(self.initializer(shape=[self.fout], dtype=tf.float32))
Ejemplo n.º 6
0
 def computeInitializer(cls, seed=None):
     """
     Compute layer initializer
     Parameters:
     - seed  -- seed for random generator used by analyzer
                If it is None a unique repeatable seed is generated
     """
     _seed = seed if seed else UniqueSeed.getSeed()
     #_initializer=initializers.GlorotNormal(seed=seed)
     _initializer = initializers.GlorotUniform(seed=seed)
     return _initializer
Ejemplo n.º 7
0
    def __init__(self, fout):
        super(GraphNodes, self).__init__()

        self.fout = fout  # 16
        self.thresold = 1e-12  # eps

        self.fc = layers.Dense(self.fout, use_bias=False)
        self.leakyrelu = layers.LeakyReLU(alpha=0.2)
        self.initializer = initializers.GlorotUniform()
        self.bias = tf.Variable(
            self.initializer(shape=[self.fout], dtype=tf.float32))
Ejemplo n.º 8
0
    def __init__(self, p):
        super(EmbeddingNetwork, self).__init__()

        self.p = p

        self.theta = layers.Dense(p, input_shape=(None, p))
        self.theta4 = tf.Variable(initializers.GlorotUniform()(shape=(1, p)),
                                  trainable=True,
                                  dtype=tf.float32)

        self.relu_for_outputs = layers.ReLU()
Ejemplo n.º 9
0
 def __init__(self,
              W_reg='l2',
              b_reg='l2',
              W_constraint='MinMaxNorm',
              b_constraint='MinMaxNorm',
              output_attention=False,
              **kwargs):
     self.initializer = initializers.GlorotUniform()
     self.weight_regularizers = regularizers.get(W_reg)
     self.bias_regularizers = regularizers.get(b_reg)
     self.weight_constraint = constraints.get(W_constraint)
     self.bias_constraint = constraints.get(b_constraint)
     self.output_attention = output_attention
     super(Attention, self).__init__(dtype='float32', **kwargs)
    def __init__(self, p):
        super(Structure2Vec, self).__init__()

        self.p = p

        self.theta1 = layers.Dense(p, input_shape=(None, 1))
        self.theta2 = layers.Dense(p, input_shape=(None, p))
        self.theta3 = layers.Dense(p, input_shape=(None, p))
        self.theta4 = tf.Variable(initializers.GlorotUniform()(shape=(1, p)),
                                  trainable=True,
                                  dtype=tf.float32)
        
        self.relu_for_unit4 = layers.ReLU()
        self.relu_for_outputs = layers.ReLU()
Ejemplo n.º 11
0
    def __init__(self, f_gcn, f_atten, channels=4):
        super(EGCN, self).__init__()
        self.f_gcn = f_gcn
        self.f_atten = f_atten
        self.channels = channels

        # initialize custom parameters
        self.initializer = initializers.GlorotUniform()

        self.w_atten = tf.Variable(
            self.initializer(shape=[self.channels, self.f_atten],
                             dtype=tf.float32))  # w_atten
        self.bn = layers.BatchNormalization()  # bn
        self.w = layers.Dense(self.f_gcn)  # fc
        self.bn2 = layers.BatchNormalization()
Ejemplo n.º 12
0
    def conv2d_layer(self, layer_metadata):
        if layer_metadata["initializer"] == "xavier":
            initializer = initializers.GlorotUniform()
        elif layer_metadata["initializer"] == "random":
            initializer = initializers.RandomNormal()
        else:
            raise ValueError(
                "Specified initializer for {} is invalid: should be one of (xavier, random)"
                .format(layer_metadata))

        if layer_metadata["regularizer"] not in ("l1", "l2", None):
            raise ValueError(
                "Specified regularizer for {} is invalid: should be one of (l1, l2, None)"
                .format(layer_metadata))
        else:
            regularizer = self.get_regularizer(layer_metadata["regularizer"],
                                               layer_metadata["reg_ratio"])

        if layer_metadata["activation"] not in ("relu", "sigmoid, softmax",
                                                "tanh"):
            raise ValueError(
                "Activation specified for {} is invalid, should be one of (relu, sigmoid, softmax, tanh)"
            )

        if "batch_norm" in layer_metadata.keys(
        ) and layer_metadata["batch_norm"] == True:
            return layers.Conv2D(filters=layer_metadata["filters"],
                                 kernel_size=layer_metadata["kernel_size"],
                                 strides=layer_metadata["strides"],
                                 padding=layer_metadata["padding"],
                                 data_format=self.data_format,
                                 activation=None,
                                 kernel_initializer=initializer,
                                 bias_initializer=initializer,
                                 kernel_regularizer=regularizer,
                                 bias_regularizer=regularizer)

        return layers.Conv2D(filters=layer_metadata["filters"],
                             kernel_size=layer_metadata["kernel_size"],
                             strides=layer_metadata["strides"],
                             padding=layer_metadata["padding"],
                             data_format=self.data_format,
                             activation=layer_metadata["activation"],
                             kernel_initializer=initializer,
                             bias_initializer=initializer,
                             kernel_regularizer=regularizer,
                             bias_regularizer=regularizer)
Ejemplo n.º 13
0
    def cal_states_similarity(self, state):
        seed = 0
        input_t = tf.convert_to_tensor(state, dtype=np.float32)
        x = Lambda(lambda x: x / 255., name="input_normalizer")(input_t)

        x = TimeDistributed(
            Conv2D(filters=32,
                   kernel_size=6,
                   strides=6,
                   kernel_initializer=initializers.GlorotUniform(seed),
                   input_shape=x.shape))(x)
        x = LeakyReLU(0.01)(x)
        x = TimeDistributed(MaxPooling2D())(x)

        x = Flatten()(x)
        state = x.numpy()

        dist = np.linalg.norm(state[0, :] - state[1, :])

        return dist
Ejemplo n.º 14
0
 def __init__(self, n_head, f_in, f_out, attn_dropout, bias=True):
     super(BatchMultiHeadGraphAttention, self).__init__()
     self.n_head = n_head  # 头大小
     self.f_in = f_in  # 输入大小
     self.f_out = f_out  # 输出大小
     self.attn_dropout = attn_dropout  # dropout
     self.add_self_loop = True  # 为防止没有邻居结点出现的情况
     self.initializer = initializers.GlorotUniform()  # 初始化分布
     self.w = tf.Variable(
         self.initializer(shape=[self.n_head, self.f_in, self.f_out],
                          dtype=tf.float32))  # 自定义参数 权重
     self.adj = []
     self.fc = tf.Variable(
         self.initializer(shape=[self.n_head, 2 * self.f_out, 1],
                          dtype=tf.float32))  # 自定义参数 att
     self.leaky_relu = layers.LeakyReLU(alpha=0.2)  # 激活函数
     self.softmax = layers.Softmax(axis=-1)  # 归一层
     self.dropout = layers.Dropout(rate=self.attn_dropout)  # Dropout 层
     if bias:
         self.bias = tf.Variable(tf.zeros(self.f_out))  # 自定义参数 偏置
Ejemplo n.º 15
0
    def __init__(self, n_head, f_in, f_out, attn_dropout, bias=True):
        super(MultiHeadGraphAttention, self).__init__()
        self.n_head = n_head  # 头大小
        self.f_in = f_in  # 输入大小
        self.f_out = f_out  # 输出大小
        self.isbias = bias  # 偏置
        self.initializer = initializers.GlorotUniform()  # 初始化分布
        self.w = tf.Variable(
            self.initializer(shape=[self.n_head, self.f_in, self.f_out],
                             dtype=tf.float32))  # 自定义参数 权重
        self.a_src = tf.Variable(
            self.initializer(shape=[self.n_head, self.f_out, 1],
                             dtype=tf.float32))  # 自定义参数
        self.a_dst = tf.Variable(
            self.initializer(shape=[self.n_head, self.f_out, 1],
                             dtype=tf.float32))  # 自定义参数

        self.leaky_relu = layers.LeakyReLU(alpha=0.2)  # 激活函数
        self.softmax = layers.Softmax(axis=-1)  # 归一层
        self.dropout = layers.Dropout(rate=attn_dropout)  # Dropout 层
        if self.isbias:
            self.bias = tf.Variable(tf.zeros(self.f_out))  # 自定义参数 偏置
Ejemplo n.º 16
0
def CNorm(vst_onlyTokens,
          dl_terms,
          dl_associations,
          vso,
          nbEpochs=30,
          batchSize=64,
          l_numberOfFilters=[4000],
          l_filterSizes=[1],
          phraseMaxSize=15):

    # Preparing data for SLFNN and S-CNN components:
    dataSCNN, labels, l_unkownTokens, l_uncompleteExpressions = prepare2D_data(
        vst_onlyTokens, dl_terms, dl_associations, vso, phraseMaxSize)
    dataSLFNN = numpy.zeros((dataSCNN.shape[0], dataSCNN.shape[2]))
    for i in range(dataSCNN.shape[0]):
        numberOfToken = 0
        for embedding in dataSCNN[i]:
            if not numpy.any(embedding):
                pass
            else:
                numberOfToken += 1
                dataSLFNN[i] += embedding

        if numberOfToken > 0:
            dataSLFNN[i] = dataSLFNN[i] / numberOfToken

    # Input layers:
    inputLP = Input(shape=dataSLFNN.shape[1])
    inputCNN = Input(shape=[dataSCNN.shape[1], dataSCNN.shape[2]])

    # SLFNN component:
    ontoSpaceSize = labels.shape[2]
    denseLP = layers.Dense(
        units=ontoSpaceSize,
        use_bias=True,
        kernel_initializer=initializers.GlorotUniform())(inputLP)
    modelLP = Model(inputs=inputLP, outputs=denseLP)

    # Shallow-CNN component:
    l_subLayers = list()
    for i, filterSize in enumerate(l_filterSizes):

        convLayer = (layers.Conv1D(
            l_numberOfFilters[i],
            filterSize,
            strides=1,
            kernel_initializer=initializers.GlorotUniform()))(inputCNN)

        outputSize = phraseMaxSize - filterSize + 1
        pool = (layers.MaxPool1D(pool_size=outputSize))(convLayer)

        activationLayer = (layers.LeakyReLU(alpha=0.3))(pool)

        l_subLayers.append(activationLayer)

    if len(l_filterSizes) > 1:
        concatenateLayer = (layers.Concatenate(axis=-1))(
            l_subLayers)  # axis=-1 // concatenating on the last dimension
    else:
        concatenateLayer = l_subLayers[0]

    denseLayer = layers.Dense(
        ontoSpaceSize,
        kernel_initializer=initializers.GlorotUniform())(concatenateLayer)
    modelCNN = Model(inputs=inputCNN, outputs=denseLayer)

    convModel = Model(inputs=inputCNN, outputs=concatenateLayer)
    fullmodel = models.Sequential()
    fullmodel.add(convModel)

    # Combination of the two components:
    combinedLayer = layers.average([modelLP.output, modelCNN.output])
    fullModel = Model(inputs=[inputLP, inputCNN], outputs=combinedLayer)
    fullModel.summary()

    # Compile and train:
    fullModel.compile(
        optimizer=optimizers.Nadam(),
        loss=losses.LogCosh(),
        metrics=[metrics.CosineSimilarity(),
                 metrics.MeanSquaredError()])
    fullModel.fit([dataSLFNN, dataSCNN],
                  labels,
                  epochs=nbEpochs,
                  batch_size=batchSize)

    return fullModel, vso, l_unkownTokens
Ejemplo n.º 17
0
unmask_count = total - mask_count
print(total, unmask_count, mask_count)
weight_for_0 = (1 / unmask_count) * (total) / 2.0
weight_for_1 = (1 / mask_count) * (total) / 2.0

class_weights = {0: weight_for_0, 1: weight_for_1}
print("Done, class_weights: ", class_weights)

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import initializers

initializer = initializers.GlorotUniform()

print("Compiling ML models")
model = Sequential()

model.add(
    Conv2D(32, (3, 3),
           input_shape=data.shape[1:],
           kernel_initializer=initializer,
           padding="same"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#The first CNN layer followed by Relu and MaxPooling layers

model.add(Conv2D(64, (3, 3), kernel_initializer=initializer, padding="same"))
model.add(Activation('relu'))
Ejemplo n.º 18
0
    def __init__(self,
                 name=None,
                 d_model=512,
                 d_proj=64,
                 n_heads=8,
                 use_bias=False,
                 qkvw_init_scale=[1, 1, 1, 1],
                 apply_softmax=True,
                 sparse_pattern=None,
                 sparse_block_size=16,
                 trainable=True,
                 relative_attention_type=False,
                 shape_2d=None,
                 share_pe_heads=True,
                 pe_initialization='uniform',
                 cls_token=True,
                 q_initializer='uniform',
                 k_initializer='uniform',
                 v_initializer='uniform',
                 o_initializer='uniform'):

        super(MultiheadAttention, self).__init__(name=name)
        self.d_model = d_model
        self.d_proj = d_proj
        self.n_heads = n_heads
        self.sparse_pattern = sparse_pattern

        if sparse_pattern is None:
            self.scaled_dot_product_attention = ScaledDotProductAttention(
                name=self.name + '_attention',
                apply_softmax=apply_softmax,
                relative_attention_type=relative_attention_type,
                shape_2d=shape_2d,
                share_pe_heads=share_pe_heads,
                pe_initialization=pe_initialization,
                cls_token=cls_token)
        else:
            self.scaled_dot_product_attention = BlockSparseProductAttention(
                name=self.name + '_block_sparse_attention',
                block_size=sparse_block_size,
                sparse_pattern=sparse_pattern,
                apply_softmax=apply_softmax)

        if q_initializer == 'uniform':
            q_initializer = tfki.GlorotUniform()
        elif q_initializer == 'zeros':
            q_initializer = tfki.Zeros()
        elif q_initializer == 'identity':
            q_initializer = tfki.Identity()

        if k_initializer == 'uniform':
            k_initializer = tfki.GlorotUniform()
        elif k_initializer == 'zeros':
            k_initializer = tfki.Zeros()
        elif k_initializer == 'identity':
            k_initializer = tfki.Identity()

        if v_initializer == 'uniform':
            v_initializer = tfki.GlorotUniform()
        elif v_initializer == 'zeros':
            v_initializer = tfki.Zeros()
        elif v_initializer == 'identity':
            v_initializer = tfki.Identity()

        if o_initializer == 'uniform':
            o_initializer = tfki.GlorotUniform()
        elif o_initializer == 'zeros':
            o_initializer = tfki.Zeros()
        elif o_initializer == 'identity':
            o_initializer = tfki.Identity()

        self.wq = tf.keras.layers.Dense(d_proj * n_heads,
                                        use_bias=use_bias,
                                        name=self.name + '_wq',
                                        kernel_initializer=InitializerScaler(
                                            q_initializer, qkvw_init_scale[0]),
                                        trainable=trainable)
        self.wk = tf.keras.layers.Dense(d_proj * n_heads,
                                        use_bias=use_bias,
                                        name=self.name + '_wk',
                                        kernel_initializer=InitializerScaler(
                                            k_initializer, qkvw_init_scale[1]),
                                        trainable=trainable)
        self.wv = tf.keras.layers.Dense(d_proj * n_heads,
                                        use_bias=use_bias,
                                        name=self.name + '_wv',
                                        kernel_initializer=InitializerScaler(
                                            v_initializer, qkvw_init_scale[2]),
                                        trainable=trainable)
        self.wo = tf.keras.layers.Dense(d_model,
                                        use_bias=use_bias,
                                        name=self.name + '_wo',
                                        kernel_initializer=InitializerScaler(
                                            o_initializer, qkvw_init_scale[3]),
                                        trainable=trainable)
Ejemplo n.º 19
0
    def __init__(self,
                 p_fun,
                 loss_weights=(0.5, 0.5),
                 n_features=1,
                 n_labels=1,
                 hidden_layers=None,
                 metric='mae',
                 initializer=None,
                 optimizer=None,
                 learning_rate=0.01,
                 history=None,
                 kernel_reg_rate=0.0,
                 kernel_reg_power=1,
                 bias_reg_rate=0.0,
                 bias_reg_power=1,
                 feature_names=None,
                 output_names=None):
        """
        Parameters
        ----------
        p_fun : function
            Physics function to guide the neural network loss function.
            This function must take (y_predicted, y_true, p, **p_kwargs)
            as arguments with datatypes (tf.Tensor, np.ndarray, np.ndarray).
            The function must return a tf.Tensor object with a single numeric
            loss value (output.ndim == 0).
        loss_weights : tuple, optional
            Loss weights for the neural network y_predicted vs. y_true
            and for the p_fun loss, respectively. For example,
            loss_weights=(0.0, 1.0) would simplify the phygnn loss function
            to just the p_fun output.
        n_features : int, optional
            Number of input features.
        n_labels : int, optional
            Number of output labels.
        hidden_layers : list, optional
            List of dictionaries of key word arguments for each hidden
            layer in the NN. Dense linear layers can be input with their
            activations or separately for more explicit control over the layer
            ordering. For example, this is a valid input for hidden_layers that
            will yield 7 hidden layers (9 layers total):
                [{'units': 64, 'activation': 'relu', 'dropout': 0.01},
                 {'units': 64},
                 {'batch_normalization': {'axis': -1}},
                 {'activation': 'relu'},
                 {'dropout': 0.01}]
        metric : str, optional
            Loss metric option for the NN loss function (not the physical
            loss function). Must be a valid key in phygnn.loss_metrics.METRICS
        initializer : tensorflow.keras.initializers, optional
            Instantiated initializer object. None defaults to GlorotUniform
        optimizer : tensorflow.keras.optimizers, optional
            Instantiated neural network optimization object.
            None defaults to Adam.
        learning_rate : float, optional
            Optimizer learning rate.
        history : None | pd.DataFrame, optional
            Learning history if continuing a training session.
        kernel_reg_rate : float, optional
            Kernel regularization rate. Increasing this value above zero will
            add a structural loss term to the loss function that
            disincentivizes large hidden layer weights and should reduce
            model complexity. Setting this to 0.0 will disable kernel
            regularization.
        kernel_reg_power : int, optional
            Kernel regularization power. kernel_reg_power=1 is L1
            regularization (lasso regression), and kernel_reg_power=2 is L2
            regularization (ridge regression).
        bias_reg_rate : float, optional
            Bias regularization rate. Increasing this value above zero will
            add a structural loss term to the loss function that
            disincentivizes large hidden layer biases and should reduce
            model complexity. Setting this to 0.0 will disable bias
            regularization.
        bias_reg_power : int, optional
            Bias regularization power. bias_reg_power=1 is L1
            regularization (lasso regression), and bias_reg_power=2 is L2
            regularization (ridge regression).
        feature_names : list | tuple | None, optional
            Training feature names (strings). Mostly a convenience so that a
            loaded-from-disk model will have declared feature names, making it
            easier to feed in features for prediction. This will also get set
            if phygnn is trained on a DataFrame.
        output_names : list | tuple | None, optional
            Prediction output names (strings). Mostly a convenience so that a
            loaded-from-disk model will have declared output names, making it
            easier to understand prediction output. This will also get set
            if phygnn is trained on a DataFrame.
        """
        self._p_fun = p_fun
        self._loss_weights = None
        self._metric = metric
        self._input_dims = n_features
        self._output_dims = n_labels
        self._layers = Layers(n_features,
                              n_labels=n_labels,
                              hidden_layers=hidden_layers)
        self._optimizer = None
        self._history = history
        self._learning_rate = learning_rate
        self.kernel_reg_rate = kernel_reg_rate
        self.kernel_reg_power = kernel_reg_power
        self.bias_reg_rate = bias_reg_rate
        self.bias_reg_power = bias_reg_power
        self.feature_names = feature_names
        self.output_names = output_names

        self.set_loss_weights(loss_weights)

        if self._metric.lower() not in METRICS:
            e = ('Could not recognize error metric "{}". The following error '
                 'metrics are available: {}'.format(self._metric,
                                                    list(METRICS.keys())))
            logger.error(e)
            raise KeyError(e)
        else:
            self._metric_fun = METRICS[self._metric.lower()]

        self._initializer = initializer
        if initializer is None:
            self._initializer = initializers.GlorotUniform()

        self._optimizer = optimizer
        if optimizer is None:
            self._optimizer = optimizers.Adam(learning_rate=learning_rate)
Ejemplo n.º 20
0
import tensorflow as tf
import tensorflow.keras.layers as kl
import tensorflow.keras.initializers as inits
import numpy as np
from tensorflow.keras.regularizers import l2

initialize_relu = inits.VarianceScaling(scale=1./3., mode="fan_in", distribution="uniform")  # this conserves std for layers with relu activation 
initialize_tanh = inits.GlorotUniform()  # This is the standard tf.keras.layers.Dense initializer, it conserves std for layers with tanh activation

class Actor(tf.keras.Model):
    def __init__(self, state_dim, action_dim, max_action, ac_layers, reg_coeff):
        super(Actor, self).__init__()

        self.l1 = kl.Dense(ac_layers[0], activation='relu', kernel_initializer=initialize_relu,
                          kernel_regularizer=l2(reg_coeff))
        self.l2 = kl.Dense(ac_layers[1], activation='relu', kernel_initializer=initialize_relu,
                          kernel_regularizer=l2(reg_coeff))
        self.l3 = kl.Dense(action_dim, activation='tanh', kernel_initializer=initialize_tanh,
                          kernel_regularizer=l2(reg_coeff))
        
        self._max_action = max_action
        # Remember building your model before you can copy it
        # else the weights wont be there. Could also call the model once in the beginning to build it implicitly 
        self.build(input_shape=(None, state_dim))

    #@tf.function 
    def call(self, state):
        assert state.dtype == tf.float32
        x = self.l1(state)
        x = self.l2(x)
        return self._max_action * self.l3(x)
Ejemplo n.º 21
0
    def __init__(self,
                 p_fun,
                 loss_weights=(0.5, 0.5),
                 n_features=1,
                 n_labels=1,
                 hidden_layers=None,
                 input_layer=None,
                 output_layer=None,
                 layers_obj=None,
                 metric='mae',
                 initializer=None,
                 optimizer=None,
                 learning_rate=0.01,
                 history=None,
                 kernel_reg_rate=0.0,
                 kernel_reg_power=1,
                 bias_reg_rate=0.0,
                 bias_reg_power=1,
                 feature_names=None,
                 output_names=None,
                 name=None,
                 version_record=None):
        """
        Parameters
        ----------
        p_fun : function
            Physics function to guide the neural network loss function.
            This fun must take (phygnn, y_true, y_predicted, p, **p_kwargs)
            as arguments with datatypes (PhysicsGuidedNeuralNetwork, tf.Tensor,
            np.ndarray, np.ndarray). The function must return a tf.Tensor
            object with a single numeric loss value (output.ndim == 0).
        loss_weights : tuple, optional
            Loss weights for the neural network y_true vs. y_predicted
            and for the p_fun loss, respectively. For example,
            loss_weights=(0.0, 1.0) would simplify the phygnn loss function
            to just the p_fun output.
        n_features : int, optional
            Number of input features. This should match the last dimension
            of the feature training data.
        n_labels : int, optional
            Number of output labels. This should match the last dimension
            of the label training data.
        hidden_layers : list, optional
            List of dictionaries of key word arguments for each hidden
            layer in the NN. Dense linear layers can be input with their
            activations or separately for more explicit control over the layer
            ordering. For example, this is a valid input for hidden_layers that
            will yield 8 hidden layers (10 layers including input+output):
                [{'units': 64, 'activation': 'relu', 'dropout': 0.01},
                 {'units': 64},
                 {'batch_normalization': {'axis': -1}},
                 {'activation': 'relu'},
                 {'dropout': 0.01},
                 {'class': 'Flatten'},
                 ]
        input_layer : None | bool | dict
            Input layer. specification. Can be a dictionary similar to
            hidden_layers specifying a dense / conv / lstm layer.  Will
            default to a keras InputLayer with input shape = n_features.
            Can be False if the input layer will be included in the
            hidden_layers input.
        output_layer : None | bool | list | dict
            Output layer specification. Can be a list/dict similar to
            hidden_layers input specifying a dense layer with activation.
            For example, for a classfication problem with a single output,
            output_layer should be [{'units': 1}, {'activation': 'sigmoid'}].
            This defaults to a single dense layer with no activation
            (best for regression problems).  Can be False if the output layer
            will be included in the hidden_layers input.
        layers_obj : None | phygnn.utilities.tf_layers.Layers
            Optional initialized Layers object to set as the model layers
            including pre-set weights. This option will override the
            hidden_layers, input_layer, and output_layer arguments.
        metric : str, optional
            Loss metric option for the NN loss function (not the physical
            loss function). Must be a valid key in phygnn.loss_metrics.METRICS
            or a method in tensorflow.keras.losses that takes
            (y_true, y_predicted) as arguments.
        initializer : tensorflow.keras.initializers, optional
            Instantiated initializer object. None defaults to GlorotUniform
        optimizer : tensorflow.keras.optimizers | dict | None
            Instantiated tf.keras.optimizers object or a dict optimizer config
            from tf.keras.optimizers.get_config(). None defaults to Adam.
        learning_rate : float, optional
            Optimizer learning rate. Not used if optimizer input arg is a
            pre-initialized object or if optimizer input arg is a config dict.
        history : None | pd.DataFrame, optional
            Learning history if continuing a training session.
        kernel_reg_rate : float, optional
            Kernel regularization rate. Increasing this value above zero will
            add a structural loss term to the loss function that
            disincentivizes large hidden layer weights and should reduce
            model complexity. Setting this to 0.0 will disable kernel
            regularization.
        kernel_reg_power : int, optional
            Kernel regularization power. kernel_reg_power=1 is L1
            regularization (lasso regression), and kernel_reg_power=2 is L2
            regularization (ridge regression).
        bias_reg_rate : float, optional
            Bias regularization rate. Increasing this value above zero will
            add a structural loss term to the loss function that
            disincentivizes large hidden layer biases and should reduce
            model complexity. Setting this to 0.0 will disable bias
            regularization.
        bias_reg_power : int, optional
            Bias regularization power. bias_reg_power=1 is L1
            regularization (lasso regression), and bias_reg_power=2 is L2
            regularization (ridge regression).
        feature_names : list | tuple | None, optional
            Training feature names (strings). Mostly a convenience so that a
            loaded-from-disk model will have declared feature names, making it
            easier to feed in features for prediction. This will also get set
            if phygnn is trained on a DataFrame.
        output_names : list | tuple | None, optional
            Prediction output names (strings). Mostly a convenience so that a
            loaded-from-disk model will have declared output names, making it
            easier to understand prediction output. This will also get set
            if phygnn is trained on a DataFrame.
        name : None | str
            Optional model name for debugging.
        version_record : dict | None
            Optional record of import package versions. None (default) will
            save active environment versions. A dictionary will be interpreted
            as versions from a loaded model and will be saved as an attribute.
        """

        super().__init__(n_features=n_features,
                         n_labels=n_labels,
                         hidden_layers=hidden_layers,
                         input_layer=input_layer,
                         output_layer=output_layer,
                         layers_obj=layers_obj,
                         feature_names=feature_names,
                         output_names=output_names,
                         version_record=version_record)

        self._p_fun = p_fun if p_fun is not None else self.p_fun_dummy
        self._loss_weights = None
        self._metric = metric
        self._optimizer = None
        self._history = history
        self._learning_rate = learning_rate
        self.kernel_reg_rate = kernel_reg_rate
        self.kernel_reg_power = kernel_reg_power
        self.bias_reg_rate = bias_reg_rate
        self.bias_reg_power = bias_reg_power
        self.name = name if isinstance(name, str) else 'phygnn'

        self.set_loss_weights(loss_weights)

        if self._metric.lower() in METRICS:
            self._metric_fun = METRICS[self._metric.lower()]
        else:
            try:
                self._metric_fun = getattr(tf.keras.losses, self._metric)
            except Exception as e:
                msg = ('Could not recognize error metric "{}". The following '
                       'error metrics are available: {}'.format(
                           self._metric, list(METRICS.keys())))
                logger.error(msg)
                raise KeyError(msg) from e

        self._initializer = initializer
        if initializer is None:
            self._initializer = initializers.GlorotUniform()

        self._optimizer = optimizer
        if isinstance(optimizer, dict):
            class_name = optimizer['name']
            OptimizerClass = getattr(optimizers, class_name)
            self._optimizer = OptimizerClass.from_config(optimizer)
        elif optimizer is None:
            self._optimizer = optimizers.Adam(learning_rate=learning_rate)
Ejemplo n.º 22
0
class WindowAttention(layers.Layer):
    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
    It supports both of shifted and non-shifted window.

    Args:
        dim (int): Number of input channels.
        window_size (tuple[int]): The height and width of the window.
        num_heads (int): Number of attention heads.
        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
        attn_drop_ratio (float, optional): Dropout ratio of attention weight. Default: 0.0
        proj_drop_ratio (float, optional): Dropout ratio of output. Default: 0.0
    """

    k_ini = initializers.GlorotUniform()
    b_ini = initializers.Zeros()

    def __init__(self,
                 dim,
                 window_size,
                 num_heads=8,
                 qkv_bias=False,
                 attn_drop_ratio=0.,
                 proj_drop_ratio=0.,
                 name=None):
        super(WindowAttention, self).__init__(name=name)
        self.dim = dim
        self.window_size = window_size  # [Mh, Mw]
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = head_dim**-0.5

        self.qkv = layers.Dense(dim * 3,
                                use_bias=qkv_bias,
                                name="qkv",
                                kernel_initializer=self.k_ini,
                                bias_initializer=self.b_ini)
        self.attn_drop = layers.Dropout(attn_drop_ratio)
        self.proj = layers.Dense(dim,
                                 name="proj",
                                 kernel_initializer=self.k_ini,
                                 bias_initializer=self.b_ini)
        self.proj_drop = layers.Dropout(proj_drop_ratio)

    def build(self, input_shape):
        # define a parameter table of relative position bias
        # [2*Mh-1 * 2*Mw-1, nH]
        self.relative_position_bias_table = self.add_weight(
            shape=[
                (2 * self.window_size[0] - 1) * (2 * self.window_size[1] - 1),
                self.num_heads
            ],
            initializer=initializers.TruncatedNormal(stddev=0.02),
            trainable=True,
            dtype=tf.float32,
            name="relative_position_bias_table")

        coords_h = np.arange(self.window_size[0])
        coords_w = np.arange(self.window_size[1])
        coords = np.stack(np.meshgrid(coords_h, coords_w,
                                      indexing="ij"))  # [2, Mh, Mw]
        coords_flatten = np.reshape(coords, [2, -1])  # [2, Mh*Mw]
        # [2, Mh*Mw, 1] - [2, 1, Mh*Mw]
        relative_coords = coords_flatten[:, :,
                                         None] - coords_flatten[:,
                                                                None, :]  # [2, Mh*Mw, Mh*Mw]
        relative_coords = np.transpose(relative_coords,
                                       [1, 2, 0])  # [Mh*Mw, Mh*Mw, 2]
        relative_coords[:, :,
                        0] += self.window_size[0] - 1  # shift to start from 0
        relative_coords[:, :, 1] += self.window_size[1] - 1
        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
        relative_position_index = relative_coords.sum(-1)  # [Mh*Mw, Mh*Mw]

        self.relative_position_index = tf.Variable(
            tf.convert_to_tensor(relative_position_index),
            trainable=False,
            dtype=tf.int64,
            name="relative_position_index")

    def call(self, x, mask=None, training=None):
        """
        Args:
            x: input features with shape of (num_windows*B, Mh*Mw, C)
            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
            training: whether training mode
        """
        # [batch_size*num_windows, Mh*Mw, total_embed_dim]
        B_, N, C = x.shape

        # qkv(): -> [batch_size*num_windows, Mh*Mw, 3 * total_embed_dim]
        qkv = self.qkv(x)
        # reshape: -> [batch_size*num_windows, Mh*Mw, 3, num_heads, embed_dim_per_head]
        qkv = tf.reshape(qkv, [B_, N, 3, self.num_heads, C // self.num_heads])
        # transpose: -> [3, batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        qkv = tf.transpose(qkv, [2, 0, 3, 1, 4])
        # [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        q, k, v = qkv[0], qkv[1], qkv[2]

        # transpose: -> [batch_size*num_windows, num_heads, embed_dim_per_head, Mh*Mw]
        # multiply -> [batch_size*num_windows, num_heads, Mh*Mw, Mh*Mw]
        attn = tf.matmul(a=q, b=k, transpose_b=True) * self.scale

        # relative_position_bias(reshape): [Mh*Mw*Mh*Mw,nH] -> [Mh*Mw,Mh*Mw,nH]
        relative_position_bias = tf.gather(
            self.relative_position_bias_table,
            tf.reshape(self.relative_position_index, [-1]))
        relative_position_bias = tf.reshape(relative_position_bias, [
            self.window_size[0] * self.window_size[1],
            self.window_size[0] * self.window_size[1], -1
        ])
        relative_position_bias = tf.transpose(relative_position_bias,
                                              [2, 0, 1])  # [nH, Mh*Mw, Mh*Mw]
        attn = attn + tf.expand_dims(relative_position_bias, 0)

        if mask is not None:
            # mask: [nW, Mh*Mw, Mh*Mw]
            nW = mask.shape[0]  # num_windows
            # attn(reshape): [batch_size, num_windows, num_heads, Mh*Mw, Mh*Mw]
            # mask(expand_dim): [1, nW, 1, Mh*Mw, Mh*Mw]
            attn = tf.reshape(
                attn, [B_ // nW, nW, self.num_heads, N, N]) + tf.expand_dims(
                    tf.expand_dims(mask, 1), 0)
            attn = tf.reshape(attn, [-1, self.num_heads, N, N])

        attn = tf.nn.softmax(attn, axis=-1)
        attn = self.attn_drop(attn, training=training)

        # multiply -> [batch_size*num_windows, num_heads, Mh*Mw, embed_dim_per_head]
        x = tf.matmul(attn, v)
        # transpose: -> [batch_size*num_windows, Mh*Mw, num_heads, embed_dim_per_head]
        x = tf.transpose(x, [0, 2, 1, 3])
        # reshape: -> [batch_size*num_windows, Mh*Mw, total_embed_dim]
        x = tf.reshape(x, [B_, N, C])

        x = self.proj(x)
        x = self.proj_drop(x, training=training)
        return x
Ejemplo n.º 23
0
 def __init__(self, return_attention=False, **kwargs):
     self.init = initializers.GlorotUniform(seed=101)
     self.supports_masking = True
     self.return_attention = return_attention
     super(AttentionWeightedAverage, self).__init__(**kwargs)