예제 #1
0
    def __init__(self, process_cmd, d_model, dropout_rate, name):
        super(PrePostProcessLayer, self).__init__()
        self.process_cmd = process_cmd
        self.functors = []
        self.exec_order = ""

        for cmd in self.process_cmd:
            if cmd == "a":  # add residual connection
                self.functors.append(lambda x, y: x + y
                                     if y is not None else x)
                self.exec_order += "a"
            elif cmd == "n":  # add layer normalization
                self.functors.append(
                    self.add_sublayer(
                        "layer_norm_%d" %
                        len(self.sublayers(include_sublayers=False)),
                        LayerNorm(
                            normalized_shape=d_model,
                            param_attr=fluid.ParamAttr(
                                name=name + "_layer_norm_scale",
                                initializer=fluid.initializer.Constant(1.)),
                            bias_attr=fluid.ParamAttr(
                                name=name + "_layer_norm_bias",
                                initializer=fluid.initializer.Constant(0.)))))
                self.exec_order += "n"
            elif cmd == "d":  # add dropout
                if dropout_rate:
                    self.functors.append(lambda x: fluid.layers.dropout(
                        x, dropout_prob=dropout_rate, is_test=False))
                    self.exec_order += "d"
예제 #2
0
 def __init__(self, d_model, process_cmd, shape_len=None):
     super(PrePostProcessLayer, self).__init__()
     for cmd in process_cmd:
         if cmd == "n":
             self._layer_norm = LayerNorm(
                 normalized_shape=d_model,
                 param_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(1.)),
                 bias_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(0.)))
예제 #3
0
 def __init__(self, name_scope, process_cmd, shape_len=None):
     super(PrePostProcessLayer, self).__init__(name_scope)
     for cmd in process_cmd:
         if cmd == "n":
             self._layer_norm = LayerNorm(
                 name_scope=self.full_name(),
                 begin_norm_axis=shape_len - 1,
                 param_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(1.)),
                 bias_attr=fluid.ParamAttr(
                     initializer=fluid.initializer.Constant(0.)))
예제 #4
0
 def __init__(self, process_cmd, d_model, dropout_rate):
     super(PrePostProcessLayer, self).__init__()
     self.process_cmd = process_cmd
     self.functors = []
     for cmd in self.process_cmd:
         if cmd == "a":  # add residual connection
             self.functors.append(lambda x, y: x + y
                                  if y is not None else x)
         elif cmd == "n":  # add layer normalization
             self.functors.append(
                 self.add_sublayer(
                     "layer_norm_%d" %
                     len([layer for layer in self.children()]),
                     LayerNorm(
                         normalized_shape=d_model,
                         param_attr=fluid.ParamAttr(
                             initializer=fluid.initializer.Constant(1.)),
                         bias_attr=fluid.ParamAttr(
                             initializer=fluid.initializer.Constant(0.)))))
         elif cmd == "d":  # add dropout
             if dropout_rate:
                 self.functors.append(
                     lambda x: layers.dropout(x, dropout_prob=dropout_rate))