def test_reshape(self): a = np.random.random((2, 2, 3, 4)) i1 = ZLayer.Input(shape=(2, 3, 4)) s = ZLayer.Reshape((-1, 2, 12))(i1) m = ZModel(i1, s) # predict should not generate exception y = m.predict(a, distributed=False)
def test_load(self): input = ZLayer.Input(shape=(5, )) output = ZLayer.Dense(10)(input) zmodel = ZModel(input, output, name="graph1") tmp_path = create_tmp_path() zmodel.saveModel(tmp_path, None, True) model_reloaded = Net.load(tmp_path) input_data = np.random.random([3, 5]) self.compare_output_and_grad_input(zmodel, model_reloaded, input_data)
def test_save_load_Model(self): input = ZLayer.Input(shape=(5, )) output = ZLayer.Dense(10)(input) zmodel = ZModel(input, output, name="graph1") tmp_path = create_tmp_path() zmodel.saveModel(tmp_path, None, True) model_reloaded = Net.load(tmp_path) input_data = np.random.random([10, 5]) y = np.random.random([10, 10]) model_reloaded.compile(optimizer="adam", loss="mse") model_reloaded.fit(x=input_data, y=y, batch_size=8, nb_epoch=2)
def __init__(self, n_block, n_head, intermediate_size, hidden_drop, attn_drop, initializer_range, output_all_block, embedding_layer, input_shape, bigdl_type="float"): self.hidden_drop = hidden_drop self.attn_drop = attn_drop self.n_head = n_head self.intermediate_size = intermediate_size self.output_all_block = output_all_block self.bigdl_type = bigdl_type self.seq_len = input_shape[0][0] self.initializer_range = initializer_range self.bidirectional = True self.n_block = n_block word_input = Input(shape=input_shape[0]) token_type_input = Input(shape=input_shape[1]) position_input = Input(shape=input_shape[2]) attention_mask = Input(shape=input_shape[3]) e = embedding_layer([word_input, token_type_input, position_input]) self.hidden_size = e.get_output_shape()[-1] extended_attention_mask = (-attention_mask + 1.0) * -10000.0 next_input = e model_output = [None] * n_block model_output[0] = self.block(next_input, self.hidden_size, extended_attention_mask) for _ in range(n_block - 1): output = self.block(model_output[_], self.hidden_size, extended_attention_mask) model_output[_ + 1] = output pooler_output = self.pooler(model_output[-1], self.hidden_size) if output_all_block: model_output.append(pooler_output) model = Model( [word_input, token_type_input, position_input, attention_mask], model_output) else: model = Model( [word_input, token_type_input, position_input, attention_mask], [model_output[-1], pooler_output]) self.value = model.value
def __init__(self, n_block, hidden_drop, attn_drop, n_head, initializer_range, bidirectional, output_all_block, embedding_layer, input_shape, intermediate_size=0, bigdl_type="float"): self.hidden_drop = hidden_drop self.attn_drop = attn_drop self.n_head = n_head self.initializer_range = initializer_range self.output_all_block = output_all_block self.bidirectional = bidirectional self.intermediate_size = intermediate_size self.seq_len = input_shape[0][0] self.bigdl_type = bigdl_type if not bidirectional: mask_value = np.tril( np.ones((self.seq_len, self.seq_len), dtype=bigdl_type)) self.mask_value = auto.Constant( data=mask_value.reshape((1, 1, self.seq_len, self.seq_len))) (extended_attention_mask, embedding_inputs, inputs) = self.build_input(input_shape) embedding = embedding_layer(embedding_inputs) hidden_size = embedding.get_output_shape()[-1] next_input = embedding output = [None] * n_block output[0] = self.block(next_input, hidden_size, extended_attention_mask) for index in range(n_block - 1): o = self.block(output[index], hidden_size, extended_attention_mask) output[index + 1] = o pooler_output = self.pooler(output[-1], hidden_size) model = Model(inputs, output.append(pooler_output)) if output_all_block \ else Model(inputs, [output[-1], pooler_output]) self.value = model.value
def to_model(self): from bigdl.dllib.keras.models import Model return Model.from_jvalue( callZooFunc(self.bigdl_type, "kerasNetToModel", self.value))
def init(cls, vocab=40990, hidden_size=768, n_block=12, n_head=12, seq_len=512, intermediate_size=3072, hidden_drop=0.1, attn_drop=0.1, initializer_range=0.02, output_all_block=True, bigdl_type="float"): """ vocab: vocabulary size of training data, default is 40990 hidden_size: size of the encoder layers, default is 768 n_block: block number, default is 12 n_head: head number, default is 12 seq_len: max sequence length of training data, default is 77 intermediate_size: The size of the "intermediate" (i.e., feed-forward) hidden_drop: drop probability of full connected layers, default is 0.1 attn_drop: drop probability of attention, default is 0.1 initializer_ranger: weight initialization range, default is 0.02 output_all_block: whether output all blocks' output, default is True """ word_input = Input(shape=(seq_len, )) token_type_input = Input(shape=(seq_len, )) position_input = Input(shape=(seq_len, )) word_embedding = Embedding(vocab, hidden_size, input_length=seq_len, weights=np.random.normal( 0.0, initializer_range, (vocab, hidden_size)))(word_input) position_embedding = Embedding( seq_len, hidden_size, input_length=seq_len, weights=np.random.normal(0.0, initializer_range, (seq_len, hidden_size)))(position_input) token_type_embedding = Embedding( 2, hidden_size, input_length=seq_len, weights=np.random.normal(0.0, initializer_range, (2, hidden_size)))(token_type_input) embedding = word_embedding + position_embedding + token_type_embedding w = auto.Parameter(shape=(1, hidden_size), init_weight=np.ones((1, hidden_size), dtype=bigdl_type)) b = auto.Parameter(shape=(1, hidden_size), init_weight=np.zeros((1, hidden_size), dtype=bigdl_type)) after_norm = layer_norm(embedding, w, b, 1e-12) h = Dropout(hidden_drop)(after_norm) embedding_layer = Model([word_input, token_type_input, position_input], h) shape = ((seq_len, ), (seq_len, ), (seq_len, ), (1, 1, seq_len)) return BERT(n_block, n_head, intermediate_size, hidden_drop, attn_drop, initializer_range, output_all_block, embedding_layer, input_shape=shape)
def test_deprecated_save(self): with pytest.raises(Exception) as e_info: input = ZLayer.Input(shape=(5, )) output = ZLayer.Dense(10)(input) zmodel = ZModel(input, output, name="graph1") zmodel.save(create_tmp_path())
token_shape = (max_len, ) position_shape = (max_len, ) token_input = Input(shape=token_shape) position_input = Input(shape=position_shape) O_seq = TransformerLayer.init(vocab=max_features, hidden_size=128, n_head=8, seq_len=max_len)([token_input, position_input]) # Select the first output of the Transformer. The second is the pooled output. O_seq = SelectTable(0)(O_seq) O_seq = GlobalAveragePooling1D()(O_seq) O_seq = Dropout(0.2)(O_seq) outputs = Dense(2, activation='softmax')(O_seq) model = Model([token_input, position_input], outputs) model.summary() batch_size = 128 print('Train...') est = Estimator.from_bigdl(model=model, loss=SparseCategoricalCrossEntropy(), optimizer=Adam(), metrics=[Accuracy()]) est.fit(data=train_dataset, batch_size=batch_size, epochs=1) print("Train finished.") print('Evaluating...') result = est.evaluate(val_dataset) print(result)