def _to_zoo_input(self, input, is_constant=None): is_parameter = True if input.name in self._initializer else False if isinstance(input.zvalue, zautograd.Variable) or isinstance( input.zvalue, zautograd.Parameter): return input if isinstance(input.zvalue, np.ndarray): if is_parameter or is_constant: shape = input.zvalue.shape else: shape = input.zvalue.shape[1:] elif isinstance(input.zvalue, list): if is_parameter or is_constant: shape = input.zvalue else: shape = input.zvalue[1:] else: raise Exception("not supported type " + str(type(input.zvalue))) input.data = input.zvalue if is_constant: input.zvalue = zautograd.Parameter(shape=shape, init_weight=input.zvalue, trainable=False) elif is_parameter: input.zvalue = zautograd.Parameter( shape=shape, init_weight=input.zvalue, ) else: input.zvalue = zlayers.Input(shape=shape, name=input.name) return input
def test_parameter_create(self): w = auto.Parameter(shape=(3, 2)) value = w.get_weight() w.set_weight(value) x = auto.Variable(input_shape=(3,)) b = auto.Parameter(shape=(2,)) out = auto.mm(x, w, axes=(1, 0)) + b model = Model(input=x, output=out) input_data = np.random.uniform(0, 1, (4, 3)) model.forward(input_data)
def block(self, x, size): g = auto.Parameter(shape=(1, size), init_weight=np.ones((1, size), dtype=self.bigdl_type)) b = auto.Parameter(shape=(1, size), init_weight=np.zeros((1, size), dtype=self.bigdl_type)) g2 = auto.Parameter(shape=(1, size), init_weight=np.ones((1, size), dtype=self.bigdl_type)) b2 = auto.Parameter(shape=(1, size), init_weight=np.zeros((1, size), dtype=self.bigdl_type)) a = self.multi_head_self_attention(x, size) n = self.layer_norm(x + a, w=g, b=b) m = self.mlp(n, size) h = self.layer_norm(n + m, w=g2, b=b2) return h
def block(self, x, size, attention_mask=None, eplision=1e-5): g = auto.Parameter(shape=(1, size), init_weight=np.ones((1, size), dtype=self.bigdl_type)) b = auto.Parameter(shape=(1, size), init_weight=np.zeros((1, size), dtype=self.bigdl_type)) g2 = auto.Parameter(shape=(1, size), init_weight=np.ones((1, size), dtype=self.bigdl_type)) b2 = auto.Parameter(shape=(1, size), init_weight=np.zeros((1, size), dtype=self.bigdl_type)) a = self.multi_head_self_attention(x, size, attention_mask) n = layer_norm(x + a, w=g, b=b, e=eplision) m = self.mlp(n, size) h = layer_norm(n + m, w=g2, b=b2, e=eplision) return h
def init(cls, vocab=40990, hidden_size=768, n_block=12, n_head=12, seq_len=512, intermediate_size=3072, hidden_drop=0.1, attn_drop=0.1, initializer_range=0.02, output_all_block=True, bigdl_type="float"): """ vocab: vocabulary size of training data, default is 40990 hidden_size: size of the encoder layers, default is 768 n_block: block number, default is 12 n_head: head number, default is 12 seq_len: max sequence length of training data, default is 77 intermediate_size: The size of the "intermediate" (i.e., feed-forward) hidden_drop: drop probability of full connected layers, default is 0.1 attn_drop: drop probability of attention, default is 0.1 initializer_ranger: weight initialization range, default is 0.02 output_all_block: whether output all blocks' output, default is True """ word_input = Input(shape=(seq_len, )) token_type_input = Input(shape=(seq_len, )) position_input = Input(shape=(seq_len, )) word_embedding = Embedding(vocab, hidden_size, input_length=seq_len, weights=np.random.normal( 0.0, initializer_range, (vocab, hidden_size)))(word_input) position_embedding = Embedding( seq_len, hidden_size, input_length=seq_len, weights=np.random.normal(0.0, initializer_range, (seq_len, hidden_size)))(position_input) token_type_embedding = Embedding( 2, hidden_size, input_length=seq_len, weights=np.random.normal(0.0, initializer_range, (2, hidden_size)))(token_type_input) embedding = word_embedding + position_embedding + token_type_embedding w = auto.Parameter(shape=(1, hidden_size), init_weight=np.ones((1, hidden_size), dtype=bigdl_type)) b = auto.Parameter(shape=(1, hidden_size), init_weight=np.zeros((1, hidden_size), dtype=bigdl_type)) after_norm = layer_norm(embedding, w, b, 1e-12) h = Dropout(hidden_drop)(after_norm) embedding_layer = Model([word_input, token_type_input, position_input], h) shape = ((seq_len, ), (seq_len, ), (seq_len, ), (1, 1, seq_len)) return BERT(n_block, n_head, intermediate_size, hidden_drop, attn_drop, initializer_range, output_all_block, embedding_layer, input_shape=shape)