def test_training(): x = nd.ones((10, 10)) with train_section(): y = nd.Dropout(x, p=0.5) assert not (y.asnumpy() == x.asnumpy()).all() with test_section(): y = nd.Dropout(x, p=0.5) assert (y.asnumpy() == x.asnumpy()).all()
def test_training(): x = nd.ones((10, 10)) with record(): y = nd.Dropout(x, p=0.5) assert not (y.asnumpy() == x.asnumpy()).all() with pause(): y = nd.Dropout(x, p=0.5) assert (y.asnumpy() == x.asnumpy()).all()
def biLSTM(f_lstm, b_lstm, inputs, batch_size=None, dropout_x=0., dropout_h=0.): """Feature extraction through BiLSTM Parameters ---------- f_lstm : VariationalDropoutCell Forward cell b_lstm : VariationalDropoutCell Backward cell inputs : NDArray seq_len x batch_size dropout_x : float Variational dropout on inputs dropout_h : Not used Returns ------- outputs : NDArray Outputs of BiLSTM layers, seq_len x 2 hidden_dims x batch_size """ for f, b in zip(f_lstm, b_lstm): inputs = nd.Dropout(inputs, dropout_x, axes=[0]) # important for variational dropout fo, fs = f.unroll(length=inputs.shape[0], inputs=inputs, layout='TNC', merge_outputs=True) bo, bs = b.unroll(length=inputs.shape[0], inputs=inputs.flip(axis=0), layout='TNC', merge_outputs=True) f.reset(), b.reset() inputs = nd.concat(fo, bo.flip(axis=0), dim=2) return inputs
def forward(self, x): x = self.fc2(x) x = F.relu(x) x = F.Dropout(x) x = self.fc3(x) return x
def network(X,dropout=0.0): #encoder H1 = nd.Activation(nd.FullyConnected(data=X , weight=W1 , bias=B1 , num_hidden=num_hidden1), act_type="sigmoid") H1 = nd.Dropout(data=H1 , p=dropout) # apply dropout layer!!! H2 = nd.Activation(nd.FullyConnected(data=H1 , weight=W2 , bias=B2 , num_hidden=num_hidden2), act_type="sigmoid") H2 = nd.Dropout(data=H2 , p=dropout) # apply dropout layer!!! #decoder H3 = nd.Activation(nd.FullyConnected(data=H2 , weight=W3 , bias=B3 , num_hidden=num_hidden1_), act_type="sigmoid") H3 = nd.Dropout(data=H3 , p=dropout) # apply dropout layer!!! H4 = nd.Activation(nd.FullyConnected(data=H3 , weight=W4 , bias=B4 , num_hidden=num_hidden2_), act_type="sigmoid") H4 = nd.Dropout(data=H4 , p=dropout) # apply dropout layer!!! H5 = nd.Activation(nd.FullyConnected(data=H4 , weight=W5 , bias=B5 , num_hidden=num_outputs), act_type="sigmoid") out = H5 return out
def hybrid_forward(self, F, x): x = self.conv_stem(x) x = self.bn1(x) x = self.act1(x) x = self.blocks(x) x = self.global_pool(x) x = self.conv_head(x) x = self.act2(x) if self.dropout > 0.: x = F.Dropout(x, p=self.dropout, mode='training') x = self.classifier(x) return x
def network(X,drop_rate=0.0): # formula : output_size=((input−weights+2*Padding)/Stride)+1 #data size # MNIST,FashionMNIST = (batch size , 1 , 28 , 28) # CIFAR = (batch size , 3 , 32 , 32) C_H1=nd.Activation(data= nd.Convolution(data=X , weight = W1 , bias = B1 , kernel=(3,3) , stride=(1,1) , num_filter=60) , act_type="relu") # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30) P_H1=nd.Pooling(data = C_H1 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15) C_H2=nd.Activation(data= nd.Convolution(data=P_H1 , weight = W2 , bias = B2 , kernel=(6,6) , stride=(1,1) , num_filter=30), act_type="relu") # MNIST : result = ( batch size , 30 , 8 , 8), CIFAR10 : result = ( batch size , 30 , 10 , 10) P_H2=nd.Pooling(data = C_H2 , pool_type = "max" , kernel=(2,2), stride = (2,2)) # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5) P_H2 = nd.flatten(data=P_H2) '''FullyConnected parameter • data: (batch_size, input_dim) • weight: (num_hidden, input_dim) • bias: (num_hidden,) • out: (batch_size, num_hidden) ''' F_H1 =nd.Activation(nd.FullyConnected(data=P_H2 , weight=W3 , bias=B3 , num_hidden=120),act_type="sigmoid") F_H1 =nd.Dropout(data=F_H1, p=drop_rate) F_H2 =nd.Activation(nd.FullyConnected(data=F_H1 , weight=W4 , bias=B4 , num_hidden=64),act_type="sigmoid") F_H2 =nd.Dropout(data=F_H2, p=drop_rate) softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10)) return softmax_Y
def biLSTM(f_lstm, b_lstm, inputs, batch_size=None, dropout_x=0., dropout_h=0.): """ Feature extraction through BiLSTM :param inputs: # seq_len x batch_size :param batch_size: :return: Outputs of BiLSTM layers, seq_len x 2 hidden_dims x batch_size """ for f, b in zip(f_lstm, b_lstm): inputs = nd.Dropout(inputs, dropout_x, axes=[0]) # important for variational dropout fo, fs = f.unroll(length=inputs.shape[0], inputs=inputs, layout='TNC', merge_outputs=True) bo, bs = b.unroll(length=inputs.shape[0], inputs=inputs.flip(axis=0), layout='TNC', merge_outputs=True) f.reset(), b.reset() inputs = nd.concat(fo, bo.flip(axis=0), dim=2) return inputs
def forward(self, sentences: List[Sentence], embed_ctx=None, dropout=None) -> Tuple[nd.NDArray, nd.NDArray, List]: """ :param sentences: :return: features, tags, lengths """ longest_token_sequence_in_batch = len(max(sentences, key=len)) self.embeddings.embed(sentences, ctx=None if not embed_ctx else mx.cpu()) all_sentence_tensors = [] lengths = [] tag_list = [] padding = nd.zeros((1, self.embeddings.embedding_length), dtype='float32') for sentence in sentences: # get the tags in this sentence tag_idx = [] lengths.append(len(sentence.tokens)) word_embeddings = [] for token in sentence: # get the tag tag_idx.append(self.tag_dictionary.get_idx_for_item(token.get_tag(self.tag_type))) # get the word embeddings embedding = token.get_embedding().reshape((1, -1)) if embed_ctx: embedding = embedding.as_in_context(embed_ctx) word_embeddings.append(embedding) # pad shorter sentences out for add in range(longest_token_sequence_in_batch - len(sentence.tokens)): word_embeddings.append(padding) word_embeddings_tensor = nd.concat(*word_embeddings, dim=0) # if torch.cuda.is_available(): # tag_list.append(torch.cuda.LongTensor(tag_idx)) # else: tag_list.append(nd.array(tag_idx)) all_sentence_tensors.append(word_embeddings_tensor.expand_dims(1)) # padded tensor for entire batch sentence_tensor = nd.concat(*all_sentence_tensors, dim=1) # (IN, NN, C) # if torch.cuda.is_available(): # sentence_tensor = sentence_tensor.cuda() # -------------------------------------------------------------------- # FF PART # -------------------------------------------------------------------- sentence_tensor = self.dropout(sentence_tensor) if self.relearn_embeddings: sentence_tensor = self.embedding2nn(sentence_tensor) if self.use_rnn: # packed = torch.nn.utils.rnn.pack_padded_sequence(sentence_tensor, lengths) sentence_tensor = self.rnn(sentence_tensor) # sentence_tensor, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(rnn_output) sentence_tensor = self.dropout(sentence_tensor) if dropout: sentence_tensor = nd.Dropout(sentence_tensor, dropout, mode='always') features = self.linear(sentence_tensor) tags = nd.zeros((len(tag_list), longest_token_sequence_in_batch), dtype='int32') for i, (t, l) in enumerate(zip(tag_list, lengths)): tags[i, :l] = t return features.transpose([1, 0, 2]), tags, lengths
def network( X, drop_rate=0.0 ): # formula : output_size=((input−weights+2*Padding)/Stride)+1 #data size # MNIST,FashionMNIST = (batch size , 1 , 28 , 28) # CIFAR = (batch size , 3 , 32 , 32) # builtin The BatchNorm function moving_mean, moving_var does not work. C_H1 = nd.Activation( data=nd.BatchNorm(data=nd.Convolution(data=X, weight=W1, bias=B1, kernel=(3, 3), stride=(1, 1), num_filter=60), gamma=gamma1, beta=beta1, moving_mean=ma1, moving_var=mv1, momentum=0.9, fix_gamma=False, use_global_stats=True), act_type="relu" ) # MNIST : result = ( batch size , 60 , 26 , 26) , CIFAR10 : : result = ( batch size , 60 , 30 , 30) P_H1 = nd.Pooling( data=C_H1, pool_type="avg", kernel=(2, 2), stride=(2, 2) ) # MNIST : result = (batch size , 60 , 13 , 13) , CIFAR10 : result = (batch size , 60 , 15 , 15) C_H2 = nd.Activation( data=nd.BatchNorm(data=nd.Convolution(data=P_H1, weight=W2, bias=B2, kernel=(6, 6), stride=(1, 1), num_filter=30), gamma=gamma2, beta=beta2, moving_mean=ma2, moving_var=mv2, momentum=0.9, fix_gamma=False, use_global_stats=True), act_type="relu" ) # MNIST : result = ( batch size , 30 , 8 , 8), CIFAR10 : result = ( batch size , 30 , 10 , 10) P_H2 = nd.Pooling( data=C_H2, pool_type="avg", kernel=(2, 2), stride=(2, 2) ) # MNIST : result = (batch size , 30 , 4 , 4) , CIFAR10 : result = (batch size , 30 , 5 , 5) P_H2 = nd.flatten(data=P_H2) '''FullyConnected parameter • data: (batch_size, input_dim) • weight: (num_hidden, input_dim) • bias: (num_hidden,) • out: (batch_size, num_hidden) ''' F_H1 = nd.Activation(nd.BatchNorm(data=nd.FullyConnected( data=P_H2, weight=W3, bias=B3, num_hidden=120), gamma=gamma3, beta=beta3, moving_mean=ma3, moving_var=mv3, momentum=0.9, fix_gamma=False, use_global_stats=True), act_type="relu") F_H1 = nd.Dropout(data=F_H1, p=drop_rate) F_H2 = nd.Activation(nd.BatchNorm(data=nd.FullyConnected( data=F_H1, weight=W4, bias=B4, num_hidden=64), gamma=gamma4, beta=beta4, moving_mean=ma4, moving_var=mv4, momentum=0.9, fix_gamma=False, use_global_stats=True), act_type="relu") F_H2 = nd.Dropout(data=F_H2, p=drop_rate) #softmax_Y = nd.softmax(nd.FullyConnected(data=F_H2 ,weight=W5 , bias=B5 , num_hidden=10)) out = nd.FullyConnected(data=F_H2, weight=W5, bias=B5, num_hidden=10) return out