def __init__(self, process_cmd, d_model, dropout_rate, name): super(PrePostProcessLayer, self).__init__() self.process_cmd = process_cmd self.functors = [] self.exec_order = "" for cmd in self.process_cmd: if cmd == "a": # add residual connection self.functors.append(lambda x, y: x + y if y is not None else x) self.exec_order += "a" elif cmd == "n": # add layer normalization self.functors.append( self.add_sublayer( "layer_norm_%d" % len(self.sublayers(include_sublayers=False)), LayerNorm( normalized_shape=d_model, param_attr=fluid.ParamAttr( name=name + "_layer_norm_scale", initializer=fluid.initializer.Constant(1.)), bias_attr=fluid.ParamAttr( name=name + "_layer_norm_bias", initializer=fluid.initializer.Constant(0.))))) self.exec_order += "n" elif cmd == "d": # add dropout if dropout_rate: self.functors.append(lambda x: fluid.layers.dropout( x, dropout_prob=dropout_rate, is_test=False)) self.exec_order += "d"
def __init__(self, d_model, process_cmd, shape_len=None): super(PrePostProcessLayer, self).__init__() for cmd in process_cmd: if cmd == "n": self._layer_norm = LayerNorm( normalized_shape=d_model, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(0.)))
def __init__(self, name_scope, process_cmd, shape_len=None): super(PrePostProcessLayer, self).__init__(name_scope) for cmd in process_cmd: if cmd == "n": self._layer_norm = LayerNorm( name_scope=self.full_name(), begin_norm_axis=shape_len - 1, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(0.)))
def __init__(self, process_cmd, d_model, dropout_rate): super(PrePostProcessLayer, self).__init__() self.process_cmd = process_cmd self.functors = [] for cmd in self.process_cmd: if cmd == "a": # add residual connection self.functors.append(lambda x, y: x + y if y is not None else x) elif cmd == "n": # add layer normalization self.functors.append( self.add_sublayer( "layer_norm_%d" % len([layer for layer in self.children()]), LayerNorm( normalized_shape=d_model, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.)), bias_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(0.))))) elif cmd == "d": # add dropout if dropout_rate: self.functors.append( lambda x: layers.dropout(x, dropout_prob=dropout_rate))