예제 #1
0
 def test_reshape(self):
     a = np.random.random((2, 2, 3, 4))
     i1 = ZLayer.Input(shape=(2, 3, 4))
     s = ZLayer.Reshape((-1, 2, 12))(i1)
     m = ZModel(i1, s)
     # predict should not generate exception
     y = m.predict(a, distributed=False)
예제 #2
0
 def test_load(self):
     input = ZLayer.Input(shape=(5, ))
     output = ZLayer.Dense(10)(input)
     zmodel = ZModel(input, output, name="graph1")
     tmp_path = create_tmp_path()
     zmodel.saveModel(tmp_path, None, True)
     model_reloaded = Net.load(tmp_path)
     input_data = np.random.random([3, 5])
     self.compare_output_and_grad_input(zmodel, model_reloaded, input_data)
예제 #3
0
 def test_save_load_Model(self):
     input = ZLayer.Input(shape=(5, ))
     output = ZLayer.Dense(10)(input)
     zmodel = ZModel(input, output, name="graph1")
     tmp_path = create_tmp_path()
     zmodel.saveModel(tmp_path, None, True)
     model_reloaded = Net.load(tmp_path)
     input_data = np.random.random([10, 5])
     y = np.random.random([10, 10])
     model_reloaded.compile(optimizer="adam", loss="mse")
     model_reloaded.fit(x=input_data, y=y, batch_size=8, nb_epoch=2)
예제 #4
0
    def __init__(self,
                 n_block,
                 n_head,
                 intermediate_size,
                 hidden_drop,
                 attn_drop,
                 initializer_range,
                 output_all_block,
                 embedding_layer,
                 input_shape,
                 bigdl_type="float"):
        self.hidden_drop = hidden_drop
        self.attn_drop = attn_drop
        self.n_head = n_head
        self.intermediate_size = intermediate_size
        self.output_all_block = output_all_block
        self.bigdl_type = bigdl_type
        self.seq_len = input_shape[0][0]
        self.initializer_range = initializer_range
        self.bidirectional = True
        self.n_block = n_block

        word_input = Input(shape=input_shape[0])
        token_type_input = Input(shape=input_shape[1])
        position_input = Input(shape=input_shape[2])
        attention_mask = Input(shape=input_shape[3])

        e = embedding_layer([word_input, token_type_input, position_input])
        self.hidden_size = e.get_output_shape()[-1]
        extended_attention_mask = (-attention_mask + 1.0) * -10000.0

        next_input = e
        model_output = [None] * n_block
        model_output[0] = self.block(next_input, self.hidden_size,
                                     extended_attention_mask)

        for _ in range(n_block - 1):
            output = self.block(model_output[_], self.hidden_size,
                                extended_attention_mask)
            model_output[_ + 1] = output

        pooler_output = self.pooler(model_output[-1], self.hidden_size)

        if output_all_block:
            model_output.append(pooler_output)
            model = Model(
                [word_input, token_type_input, position_input, attention_mask],
                model_output)
        else:
            model = Model(
                [word_input, token_type_input, position_input, attention_mask],
                [model_output[-1], pooler_output])
        self.value = model.value
예제 #5
0
    def __init__(self,
                 n_block,
                 hidden_drop,
                 attn_drop,
                 n_head,
                 initializer_range,
                 bidirectional,
                 output_all_block,
                 embedding_layer,
                 input_shape,
                 intermediate_size=0,
                 bigdl_type="float"):
        self.hidden_drop = hidden_drop
        self.attn_drop = attn_drop
        self.n_head = n_head
        self.initializer_range = initializer_range
        self.output_all_block = output_all_block
        self.bidirectional = bidirectional
        self.intermediate_size = intermediate_size
        self.seq_len = input_shape[0][0]
        self.bigdl_type = bigdl_type
        if not bidirectional:
            mask_value = np.tril(
                np.ones((self.seq_len, self.seq_len), dtype=bigdl_type))
            self.mask_value = auto.Constant(
                data=mask_value.reshape((1, 1, self.seq_len, self.seq_len)))

        (extended_attention_mask, embedding_inputs,
         inputs) = self.build_input(input_shape)
        embedding = embedding_layer(embedding_inputs)
        hidden_size = embedding.get_output_shape()[-1]

        next_input = embedding

        output = [None] * n_block
        output[0] = self.block(next_input, hidden_size,
                               extended_attention_mask)

        for index in range(n_block - 1):
            o = self.block(output[index], hidden_size, extended_attention_mask)
            output[index + 1] = o

        pooler_output = self.pooler(output[-1], hidden_size)
        model = Model(inputs, output.append(pooler_output)) if output_all_block \
            else Model(inputs, [output[-1], pooler_output])
        self.value = model.value
예제 #6
0
 def to_model(self):
     from bigdl.dllib.keras.models import Model
     return Model.from_jvalue(
         callZooFunc(self.bigdl_type, "kerasNetToModel", self.value))
예제 #7
0
    def init(cls,
             vocab=40990,
             hidden_size=768,
             n_block=12,
             n_head=12,
             seq_len=512,
             intermediate_size=3072,
             hidden_drop=0.1,
             attn_drop=0.1,
             initializer_range=0.02,
             output_all_block=True,
             bigdl_type="float"):
        """
        vocab: vocabulary size of training data, default is 40990
        hidden_size: size of the encoder layers, default is 768
        n_block: block number, default is 12
        n_head: head number, default is 12
        seq_len: max sequence length of training data, default is 77
        intermediate_size: The size of the "intermediate" (i.e., feed-forward)
        hidden_drop: drop probability of full connected layers, default is 0.1
        attn_drop: drop probability of attention, default is 0.1
        initializer_ranger: weight initialization range, default is 0.02
        output_all_block: whether output all blocks' output, default is True
        """
        word_input = Input(shape=(seq_len, ))
        token_type_input = Input(shape=(seq_len, ))
        position_input = Input(shape=(seq_len, ))
        word_embedding = Embedding(vocab,
                                   hidden_size,
                                   input_length=seq_len,
                                   weights=np.random.normal(
                                       0.0, initializer_range,
                                       (vocab, hidden_size)))(word_input)
        position_embedding = Embedding(
            seq_len,
            hidden_size,
            input_length=seq_len,
            weights=np.random.normal(0.0, initializer_range,
                                     (seq_len, hidden_size)))(position_input)
        token_type_embedding = Embedding(
            2,
            hidden_size,
            input_length=seq_len,
            weights=np.random.normal(0.0, initializer_range,
                                     (2, hidden_size)))(token_type_input)
        embedding = word_embedding + position_embedding + token_type_embedding

        w = auto.Parameter(shape=(1, hidden_size),
                           init_weight=np.ones((1, hidden_size),
                                               dtype=bigdl_type))
        b = auto.Parameter(shape=(1, hidden_size),
                           init_weight=np.zeros((1, hidden_size),
                                                dtype=bigdl_type))
        after_norm = layer_norm(embedding, w, b, 1e-12)
        h = Dropout(hidden_drop)(after_norm)

        embedding_layer = Model([word_input, token_type_input, position_input],
                                h)
        shape = ((seq_len, ), (seq_len, ), (seq_len, ), (1, 1, seq_len))

        return BERT(n_block,
                    n_head,
                    intermediate_size,
                    hidden_drop,
                    attn_drop,
                    initializer_range,
                    output_all_block,
                    embedding_layer,
                    input_shape=shape)
예제 #8
0
 def test_deprecated_save(self):
     with pytest.raises(Exception) as e_info:
         input = ZLayer.Input(shape=(5, ))
         output = ZLayer.Dense(10)(input)
         zmodel = ZModel(input, output, name="graph1")
         zmodel.save(create_tmp_path())
예제 #9
0
token_shape = (max_len, )
position_shape = (max_len, )
token_input = Input(shape=token_shape)
position_input = Input(shape=position_shape)
O_seq = TransformerLayer.init(vocab=max_features,
                              hidden_size=128,
                              n_head=8,
                              seq_len=max_len)([token_input, position_input])
# Select the first output of the Transformer. The second is the pooled output.
O_seq = SelectTable(0)(O_seq)
O_seq = GlobalAveragePooling1D()(O_seq)
O_seq = Dropout(0.2)(O_seq)
outputs = Dense(2, activation='softmax')(O_seq)

model = Model([token_input, position_input], outputs)
model.summary()
batch_size = 128
print('Train...')

est = Estimator.from_bigdl(model=model,
                           loss=SparseCategoricalCrossEntropy(),
                           optimizer=Adam(),
                           metrics=[Accuracy()])
est.fit(data=train_dataset, batch_size=batch_size, epochs=1)
print("Train finished.")

print('Evaluating...')
result = est.evaluate(val_dataset)
print(result)