Esempio n. 1
0
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        hidden_size = config.hidden_size
        self.num_heads = config.num_heads
        self.attn_head_size = config.attention_head_size
        qkv_size = self.attn_head_size * self.num_heads
        initializer = get_initializer(config)
        self.query_layer = layers.Dense(qkv_size,
                                        name="self/query",
                                        kernel_initializer=initializer)
        self.key_layer = layers.Dense(qkv_size,
                                      name="self/key",
                                      kernel_initializer=initializer)
        self.value_layer = layers.Dense(qkv_size,
                                        name="self/value",
                                        kernel_initializer=initializer)
        self.attn_dropout = layers.Dropout(config.attention_dropout_prob)
        self.attn_output_layer = layers.Dense(hidden_size,
                                              name='output/dense',
                                              kernel_initializer=initializer)
        self.attn_output_dropout = layers.Dropout(config.hidden_dropout_prob,
                                                  seed=config.random_seed)
        self.attn_norm_layer = layers.LayerNormalization(
            name="output/layer_norm", axis=-1, epsilon=1e-12)

        self.w_layer = layers.Dense(1, name="self/w")
        self.attention = None
        self.random_seed = config.random_seed
        self.debug = config.debug
        self.debug_save_dir = config.debug_save_dir if config.debug else None
Esempio n. 2
0
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.bert_encoder = Bert(config)

        initializer = get_initializer(config)
        self.num_classes = config.num_classes
        self.max_seq_length = config.max_seq_length
        self.pooler = layers.Dense(config.hidden_size,
                                   kernel_initializer=initializer,
                                   name='bert/pooler/dense',
                                   activation='tanh')

        self.cls_dropout_layer = layers.Dropout(config.hidden_dropout_prob)
        self.num_choices = config.get('num_choices', 0)
        num_classes = 1 if self.num_choices else self.num_classes
        self.cls_layer = layers.Dense(num_classes,
                                      kernel_initializer=initializer,
                                      name='classifier/dense')

        self.pooled_output = None
        self.attentions = None
        self.encoded_output = None
        self.embeddings = None
        self.logits = None
        if config.use_replace_map:
            self.replace_map = {
                'LayerNorm': 'layer_norm',
                'bert/': 'bert_' + config.task + '/bert/'
            }
        else:
            self.replace_map = {}
        self.data_builder = BertClassifierDataBuilder(config)
Esempio n. 3
0
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.hidden_size = config.hidden_size
        intermediate_size = config.intermediate_size
        intermediate_act_fn = get_activation(config.intermediate_act_fn)

        kwargs['name'] = 'attention'
        self.attention_layer = MultiHeadAttention(config, **kwargs)

        if config.get('svd_units', 0) > 0:
            self.intermediate_layer0 = layers.Dense(config.svd_units,
                                                    name='dense0')
        else:
            self.intermediate_layer0 = None
        self.intermediate_layer = layers.Dense(intermediate_size,
                                               name='dense',
                                               activation=intermediate_act_fn)
        if config.get('svd_units', 0) > 0:
            self.output_layer0 = layers.Dense(config.svd_units, name='dense0')
        else:
            self.output_layer0 = None

        self.output_layer = layers.Dense(self.hidden_size, name='dense')
        self.output_dropout = layers.Dropout(config.hidden_dropout_prob,
                                             seed=config.random_seed)
        self.output_norm_layer = layers.LayerNormalization(name="layer_norm",
                                                           axis=-1,
                                                           epsilon=1e-12)

        self.attention = None
        self.debug_save_dir = config.debug_save_dir if config.debug else None
Esempio n. 4
0
    def __init__(self,
                 size_in,
                 list_size_hidden,
                 size_out,
                 lambda_l2=0.,
                 ratio_dropout=None):
        if isinstance(list_size_hidden, int):
            list_size_hidden = [list_size_hidden]
        self.size_units = [size_in] + list_size_hidden + [size_out]
        self.lambda_l2 = lambda_l2

        self.params = {}
        self.layers = OrderedDict()
        for i in range(len(self.size_units) - 1):
            affine, weight, bias, relu, dropout =\
                [k + str(i + 1) for k in ('Affine', 'W', 'b', 'Relu', 'Dropout')]
            scale = np.sqrt(2.0 / self.size_units[i])
            self.params[weight] =\
                scale * np.random.randn(self.size_units[i], self.size_units[i+1])
            self.params[bias] = np.zeros(self.size_units[i + 1])

            self.layers[affine] = clay.Affine(self.params[weight],
                                              self.params[bias])
            if ratio_dropout is not None:
                self.layers[dropout] = clay.Dropout(ratio_dropout)
            if i < len(self.size_units) - 2:
                self.layers[relu] = clay.Relu()
            else:
                self.lastLayer = clay.SoftmaxWithLoss()
Esempio n. 5
0
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.ebert_encoder = Ebert(config)

        initializer = get_initializer(config)
        self.num_classes = config.num_classes
        self.pooler = layers.Dense(config.hidden_size,
                                   kernel_initializer=initializer,
                                   name='ebert/pooler/dense',
                                   activation='tanh')
        self.cls_dropout_layer = layers.Dropout(config.hidden_dropout_prob)
        self.num_choices = config.get('num_choices', 0)
        num_classes = 1 if self.num_choices else self.num_classes
        self.cls_layer = layers.Dense(num_classes,
                                      kernel_initializer=initializer,
                                      name='classifier/dense')

        self.max_first_length = config.max_first_length + 2
        self.max_second_length = config.max_seq_length - self.max_first_length
        self.num_choices = config.get('num_choices', 0)

        self.pooled_output = None
        self.encoded_output = None
        self.embeddings = None
        self.logits = None
        self.first_embeddings = None
        self.second_embeddings = None

        task = config.task
        replace_map = OrderedDict({
            'LayerNorm':
            'layer_norm',
            'bert/pooler':
            'ebert_{}/ebert/pooler'.format(task),
            'bert/embeddings':
            'ebert_{}/ebert/embeddings'.format(task)
        })
        # upper layers must be replaced first (i.e., longest match)
        layer_key = 'bert/encoder/layer_{}'
        layer_val = 'ebert_{}/ebert/{}_encoder/layer_{}'
        for layer_idx in range(config.sep_layers, config.num_hidden_layers):
            k = layer_key.format(layer_idx)
            replace_map[k] = layer_val.format(task, 'upper', layer_idx)
        for layer_idx in range(config.sep_layers):
            k = layer_key.format(layer_idx)
            replace_map[k] = layer_val.format(task, 'lower', layer_idx)
        if config.use_replace_map:
            self.replace_map = replace_map
        else:
            self.replace_map = {}
        self.data_builder = EbertClassifierDataBuilder(config)
Esempio n. 6
0
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation='relu',
                 weight_init_std='relu',
                 weight_decay_lambda=0,
                 use_dropout=False,
                 dropout_ratio=0.5,
                 use_batchnorm=False):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.use_dropout = use_dropout
        self.weight_decay_lambda = weight_decay_lambda
        self.use_batchnorm = use_batchnorm
        self.params = {}

        # 重みの初期化
        self.__init_weight(weight_init_std)

        # レイヤの生成
        activation_layer = {'sigmoid': layers.Sigmoid, 'relu': layers.Relu}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = layers.Affine(
                self.params['W' + str(idx)], self.params['b' + str(idx)])
            if self.use_batchnorm:
                self.params['gamma' + str(idx)] = np.ones(
                    hidden_size_list[idx - 1])
                self.params['beta' + str(idx)] = np.zeros(
                    hidden_size_list[idx - 1])
                self.layers['BatchNorm' +
                            str(idx)] = layers.BatchNormalization(
                                self.params['gamma' + str(idx)],
                                self.params['beta' + str(idx)])

            self.layers['Activation_function' +
                        str(idx)] = activation_layer[activation]()

            if self.use_dropout:
                self.layers['Dropout' +
                            str(idx)] = layers.Dropout(dropout_ratio)

        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = layers.Affine(
            self.params['W' + str(idx)], self.params['b' + str(idx)])

        self.last_layer = layers.SoftmaxWithLoss()
Esempio n. 7
0
 def __init__(self, config, **kwargs):
     super().__init__(**kwargs)
     self.vocab_size = config.vocab_size
     self.embedding_size = config.hidden_size
     self.use_tpu = config.use_tpu
     self.type_vocab_size = config.type_vocab_size
     self.max_position_embeddings = config.max_position_embeddings
     self.hidden_dropout_prob = config.hidden_dropout_prob
     self.embedding_table = None
     self.token_type_table = None
     self.position_table = None
     self.layer_norm = layers.LayerNormalization(name="layer_norm",
                                                 axis=-1,
                                                 epsilon=1e-12)
     self.dropout = layers.Dropout(self.hidden_dropout_prob)
Esempio n. 8
0
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation='relu',
                 weight_init_std='relu',
                 weight_decay_lambda=0.0,
                 use_dropout=False,
                 dropout_ratio=0.5,
                 use_batchnorm=False):
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.weight_decay_lambda = weight_decay_lambda
        self.use_dropout = use_dropout
        self.use_batchnorm = use_batchnorm
        self.params = {}

        # weights initialization
        self.__init_weight(weight_init_std)

        # generate layers
        activation_layer = {'sigmoid': layers.Sigmoid, 'relu': layers.Relu}
        self.layers = OrderedDict()
        for idx in range(1, self.hidden_layer_num + 1):
            self.layers['Affine' + str(idx)] = layers.Affine(
                self.params['W' + str(idx)], self.params['b' + str(idx)])
            if self.use_batchnorm:
                self.layers['BatchNorm' +
                            str(idx)] = layers.BatchNormalization(
                                self.params['gamma' + str(idx)],
                                self.params['beta' + str(idx)])
            self.layers['Activation' +
                        str(idx)] = activation_layer[activation]()
            if self.use_dropout:
                self.layers['Dropout' +
                            str(idx)] = layers.Dropout(dropout_ratio)
        # last Affine layer need no Activation & Batch Norm
        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = layers.Affine(
            self.params['W' + str(idx)], self.params['b' + str(idx)])

        # self.last_layer = layers.SoftmaxCrossEntropy()
        self.last_layer = layers.MSE()
        # dict to save activation layer output
        self.activation_dict = OrderedDict()
    def __init__(self, input_size, hidden_size, output_size):
        # init para
        weight_init_std = 0.01
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(
            input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(
            hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        # create layers
        self.layers = OrderedDict()
        self.layers['Affine1'] = layers.Affine(self.params['W1'],
                                               self.params['b1'])
        self.layers['Relu1'] = layers.Relu()
        self.layers['Dropout1'] = layers.Dropout(drop_ratio=0.3)
        self.layers['Affine2'] = layers.Affine(self.params['W2'],
                                               self.params['b2'])

        self.lossLayer = layers.SoftmaxCrossEntropy(class_num=10)
Esempio n. 10
0
 def __init__(self, units, dropout_rate=0, **kwargs):
     super().__init__(units, **kwargs)
     self.dropout_layer = layers.Dropout(dropout_rate)
    def __init__(self,
                 input_size,
                 hidden_size_list,
                 output_size,
                 activation='relu',
                 weight_init_std='relu',
                 weight_decay_lambda=0,
                 use_dropout=False,
                 dropout_ration=0.5,
                 use_batchnorm=False):

        self.input_size = input_size
        self.hidden_size_list = hidden_size_list
        self.hidden_layer_num = len(hidden_size_list)
        self.output_size = output_size

        self.weight_decay_lambda = weight_decay_lambda
        self.use_dropout = use_dropout
        self.use_batchnorm = use_batchnorm
        self.params = {}

        # 가중치 초기화
        self.__init_weight(weight_init_std)

        # 계층 생성
        activation_layer = {'sigmoid': layers.Sigmoid, 'relu': layers.Relu}
        self.layers = OrderedDict()

        # < 은닉층 생성 >
        # self.hidden_layer_num 개수만큼
        for idx in range(1, self.hidden_layer_num + 1):
            # (1) Affine 계층
            self.layers['Affine' + str(idx)] = layers.Affine(
                self.params['W' + str(idx)], self.params['b' + str(idx)])

            # (2) BatchNormalization 계층
            if self.use_batchnorm:
                # 각 계층별 배치 정규화 계층에서 사용할 매개변수 최기화
                # 원본 그대로에서 시작하는 것으로 초기화. 1배 확대(gamma), 이동 0(beta)
                self.params['gamma' + str(idx)] = np.ones(
                    hidden_size_list[idx - 1])  # 1
                self.params['beta' + str(idx)] = np.zeros(
                    hidden_size_list[idx - 1])  # 0
                self.layers['BatchNorm' +
                            str(idx)] = layers.BatchNormalization(
                                self.params['gamma' + str(idx)],
                                self.params['beta' + str(idx)])

            # (3) 활성화 함수
            self.layers['Activation_function' +
                        str(idx)] = activation_layer[activation]()

            # (4) Dropout 계층
            if self.use_dropout:
                self.layers['Dropout' +
                            str(idx)] = layers.Dropout(dropout_ration)

        # < 출력층 Affine 생성 >
        idx = self.hidden_layer_num + 1
        self.layers['Affine' + str(idx)] = layers.Affine(
            self.params['W' + str(idx)], self.params['b' + str(idx)])

        # < 출력층 생성 >
        # 출력층 활성화 함수로 Softmax, 손실함수로 cross_entropy_error 사용
        self.last_layer = layers.SoftmaxWithLoss()
    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param_1={
                     'filter_num': 16,
                     'filter_size': 3,
                     'pad': 1,
                     'stride': 1
                 },
                 conv_param_2={
                     'filter_num': 16,
                     'filter_size': 3,
                     'pad': 1,
                     'stride': 1
                 },
                 conv_param_3={
                     'filter_num': 32,
                     'filter_size': 3,
                     'pad': 1,
                     'stride': 1
                 },
                 conv_param_4={
                     'filter_num': 32,
                     'filter_size': 3,
                     'pad': 2,
                     'stride': 1
                 },
                 conv_param_5={
                     'filter_num': 64,
                     'filter_size': 3,
                     'pad': 1,
                     'stride': 1
                 },
                 conv_param_6={
                     'filter_num': 64,
                     'filter_size': 3,
                     'pad': 1,
                     'stride': 1
                 },
                 hidden_size=50,
                 output_size=10):
        # 重みの初期化===========
        # 各層のニューロンひとつあたりが、前層のニューロンといくつのつながりがあるか
        pre_node_nums = np.array([
            1 * 3 * 3, 16 * 3 * 3, 16 * 3 * 3, 32 * 3 * 3, 32 * 3 * 3,
            64 * 3 * 3, 64 * 4 * 4, hidden_size
        ])
        wight_init_scales = np.sqrt(2.0 / pre_node_nums)  # Heの初期値

        self.params = {}
        pre_channel_num = input_dim[0]
        for idx, conv_param in enumerate([
                conv_param_1, conv_param_2, conv_param_3, conv_param_4,
                conv_param_5, conv_param_6
        ]):
            self.params[
                'W' + str(idx + 1)] = wight_init_scales[idx] * np.random.randn(
                    conv_param['filter_num'], pre_channel_num,
                    conv_param['filter_size'], conv_param['filter_size'])
            self.params['b' + str(idx + 1)] = np.zeros(
                conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']
        self.params['W7'] = wight_init_scales[6] * np.random.randn(
            pre_node_nums[6], hidden_size)
        print(self.params['W7'].shape)
        self.params['b7'] = np.zeros(hidden_size)
        self.params['W8'] = wight_init_scales[7] * np.random.randn(
            pre_node_nums[7], output_size)
        self.params['b8'] = np.zeros(output_size)

        # レイヤの生成===========
        self.layers = []
        self.layers.append(
            layers.Convolution(self.params['W1'], self.params['b1'],
                               conv_param_1['stride'], conv_param_1['pad']))
        self.layers.append(layers.Relu())
        self.layers.append(
            layers.Convolution(self.params['W2'], self.params['b2'],
                               conv_param_2['stride'], conv_param_2['pad']))
        self.layers.append(layers.Relu())
        self.layers.append(layers.Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(
            layers.Convolution(self.params['W3'], self.params['b3'],
                               conv_param_3['stride'], conv_param_3['pad']))
        self.layers.append(layers.Relu())
        self.layers.append(
            layers.Convolution(self.params['W4'], self.params['b4'],
                               conv_param_4['stride'], conv_param_4['pad']))
        self.layers.append(layers.Relu())
        self.layers.append(layers.Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(
            layers.Convolution(self.params['W5'], self.params['b5'],
                               conv_param_5['stride'], conv_param_5['pad']))
        self.layers.append(layers.Relu())
        self.layers.append(
            layers.Convolution(self.params['W6'], self.params['b6'],
                               conv_param_6['stride'], conv_param_6['pad']))
        self.layers.append(layers.Relu())
        self.layers.append(layers.Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(layers.Affine(self.params['W7'], self.params['b7']))
        self.layers.append(layers.Relu())
        self.layers.append(layers.Dropout(0.5))
        self.layers.append(layers.Affine(self.params['W8'], self.params['b8']))
        self.layers.append(layers.Dropout(0.5))

        self.last_layer = layers.SoftmaxWithLoss()