def __init__(self, queries, keys, num_heads=8, dropout_rate=0.5, training=True, causality=False, scope="multihead_attention"): '''初始化 Args: queries: 查询Q, A 3d tensor with shape of [N, T_q, d_model]. keys: 匹配K, A 3d tensor with shape of [N, T_k, d_model]. num_heads: 多头数目 dropout_rate: dropout丢弃概率 = 1 - keep_prob training: 是否在训练,控制dropout层的生效方式 causality: 是否mask未来预测部分,transformer decoder中使用 scope: 共享变量variable_scope ''' # 父类初始化 TFBaseLayer.__init__(self) # 当前层参数 self.queries = queries self.keys = keys self.num_heads = num_heads self.dropout_rate = dropout_rate self.training = training self.causality = causality self.scope = scope
def __init__(self, in_hidden, max_seq_len, filter_sizes, num_filters, training, scope="text_cnn"): '''TextCNN初始化 Args: in_hidden: 输入层tensor, 通常是一个batch的词向量 max_seq_len: 序列最大长度 filter_sizes: array类型,所有卷积核的大小,支持多个窗口同时卷积 num_filters: 卷积核个数 ''' # 父类初始化 TFBaseLayer.__init__(self) # 参数 self.in_hidden = in_hidden self.emb_size = self.in_hidden.get_shape()[-1] self.max_seq_len = max_seq_len self.filter_sizes = filter_sizes self.num_filters = num_filters self.training = training self.scope = scope
def __init__(self, input_x, vocab_size, emb_size, keep_prob, training, zero_padding_emb=True, pretrain_word_vecs=None, word_emb_trainable=True, scope="word_embedding"): '''初始化 Args: input_x: 词序列的one-hot表示, shape [B, T_w]或[B, T_s, T_w] vocab_size: 词向量为空时,使用vocab_size来初始化词向量 emb_size: 词向量维数 pretrain_word_vecs: 预训练词向量 word_emb_trainable: 预训练词向量是否可update ''' TFBaseLayer.__init__(self) self.input_x = input_x self.vocab_size = vocab_size self.emb_size = emb_size self.zero_padding_emb = zero_padding_emb self.keep_prob = keep_prob self.training = training self.pretrain_word_vecs = pretrain_word_vecs self.word_emb_trainable = word_emb_trainable self.scope = scope
def __init__(self, in_hidden, hidden_sizes, attention_size, keep_prob, training=True, rnn_type="GRU", scope="bilstm_attention"): '''Bi-LSTM-ATTENTION初始化 Args: in_hidden: 输入层 hidden_sizes: 多层BILSTM中每层隐层维数大小 attention_size: 注意力矩阵宽度 keep_prob: 多层lstm之间dropout输出时激活概率 training: 是否训练模式 rnn_type: 可选择LSTM或GRU ''' # 父类初始化 TFBaseLayer.__init__(self) # 当前layer参数 self.in_hidden = in_hidden self.hidden_sizes = hidden_sizes self.att_size = attention_size self.keep_prob = keep_prob self.training = training self.rnn_type = rnn_type self.scope = scope
def __init__(self, training, in_hidden, cls_num, cls_type, input_y, keep_prob, l2_reg_lambda, scope="classifier"): '''初始化 Args: training: 是否在训练,训练模式会返回loss in_hidden: 输入层 cls_num: 类目数 cls_type: multi-class-dense/multi-class-sparse/multi-label input_y: 标签labels, shape [batch, dense/sparse/one-hot labels] l2_reg_lambda: l2 reg lambda prob ''' TFBaseLayer.__init__(self) self.training = training self.in_hidden = in_hidden self.hidden_size = in_hidden.get_shape()[-1] self.cls_num = cls_num self.cls_type = cls_type self.input_y = input_y self.keep_prob = keep_prob self.l2_reg_lambda = l2_reg_lambda self.scope = scope
def __init__(self, in_hidden, epsilon=1e-8, scope="layer_normalization"): '''初始化 Args: in_hidden: 输入层 epsilon: 公式中分母加和部分 ''' # 父类初始化 TFBaseLayer.__init__(self) # 当前层参数 self.in_hidden = in_hidden self.epsilon = epsilon self.scope = scope
def __init__(self, in_hidden, num_units=[2048, 512], scope="positionwise_feedforward"): '''position-wise feed forward net. Args: in_hidden: 输入层 A 3d tensor with shape of [N, T, C]. num_units: 两层全连接的隐层大小A list of two integers. ''' # 父类初始化 TFBaseLayer.__init__(self) # 当前层参数 self.in_hidden = in_hidden self.num_units = num_units self.scope = scope
def __init__(self, in_hidden, hidden_sizes, attention_size, keep_prob): '''Bi-LSTM-ATTENTION初始化 Args: in_hidden: 输入层 hidden_sizes: 多层BILSTM中每层隐层维数大小 attention_size: 注意力矩阵宽度 keep_prob: 多层lstm之间dropout输出时激活概率 ''' # 父类初始化 TFBaseLayer.__init__(self) # 当前layer参数 self.in_hidden = in_hidden self.hidden_sizes = hidden_sizes self.att_size = attention_size self.keep_prob = keep_prob
def __init__(self, in_hidden, max_seq_len, masking=True, scope="positional_encoding"): '''positional encoding Args: in_hidden: 输入层 A 3d tensor with shape of [N, T, C]. max_seq_len: 最大长度 ''' # 父类初始化 TFBaseLayer.__init__(self) # 当前层参数 self.in_hidden = in_hidden self.max_seq_len = max_seq_len self.masking = masking self.scope = scope
def __init__(self, in_hidden, attention_size, training=True, scope="soft_attention"): '''初始化 Args: in_hidden: 需要进行软加权的隐层 attention_size: attention权重矩阵宽度 training: 是否训练模式 ''' # 父类初始化 TFBaseLayer.__init__(self) # 当前层参数 self.in_hidden = in_hidden self.in_hidden_size = in_hidden.get_shape()[-1] self.attention_size = attention_size self.training = training self.scope = scope
def __init__(self, input_x, vocab_size, emb_size, pretrain_word_vecs=None, word_emb_trainable=True): '''初始化 Args: input_x: 词序列的one-hot表示, shape [batch, wordid_list] vocab_size: 词向量为空时,使用vocab_size来初始化词向量 emb_size: 词向量维数 pretrain_word_vecs: 预训练词向量 word_emb_trainable: 预训练词向量是否可update ''' TFBaseLayer.__init__(self) self.input_x = input_x self.vocab_size = vocab_size self.emb_size = emb_size self.pretrain_word_vecs = pretrain_word_vecs self.word_emb_trainable = word_emb_trainable
def __init__(self, mode, in_hidden, cls_num, cls_type, input_y, keep_prob, l2_reg_lambda): '''初始化 Args: mode: 训练(train) or 预测(infer)模式,训练模式会返回loss in_hidden: 输入层 cls_num: 类目数 cls_type: multi-class-dense/multi-class-sparse/multi-label input_y: 标签labels, shape [batch, dense/sparse/one-hot labels] l2_reg_lambda: l2 reg lambda prob ''' TFBaseLayer.__init__(self) self.mode = mode self.in_hidden = in_hidden self.hidden_size = in_hidden.get_shape()[-1] self.cls_num = cls_num self.cls_type = cls_type self.input_y = input_y self.keep_prob = keep_prob self.l2_reg_lambda = l2_reg_lambda