def train(self, lr, num_epochs): # import ipdb; ipdb.set_trace() ctx = d2l.try_gpu() embed_size = self.embed_size self.net = nn.Sequential() self.net.add( nn.Embedding(input_dim=len(self.idx_to_token), output_dim=embed_size), nn.Embedding(input_dim=len(self.idx_to_token), output_dim=embed_size)) self.net.initialize(ctx=ctx, force_reinit=True) trainer = gluon.Trainer(self.net.collect_params(), 'adam', {'learning_rate': lr}) loss = gloss.SigmoidBinaryCrossEntropyLoss() for epoch in range(num_epochs): start, l_sum, n = time.time(), 0.0, 0 for batch in self.data_iter: center, context_negative, mask, label = [ data.as_in_context(ctx) for data in batch ] with autograd.record(): pred = self.skip_gram(center, context_negative, self.net[0], self.net[1]) l = (loss(pred.reshape(label.shape), label, mask) * mask.shape[1] / mask.sum(axis=1)) l.backward() trainer.step(self.batch_size) # l_sum += l_sum().asscalar() l_sum += l_sum n += l.size print('epoch %d, loss %.2f, time %.2fs' % (epoch + 1, l_sum / n, time.time() - start))
lines = [[vocab.bos] + line + [vocab.eos] for line in lines] array = torch.tensor([pad(line, max_len, vocab.pad) for line in lines]) valid_len = (array != vocab.pad).sum(1) return array, valid_len src_vocab, tgt_vocab = build_vocab(source), build_vocab(target) src_array, src_valid_len = build_array(source, src_vocab, max_len, True) tgt_array, tgt_valid_len = build_array(target, tgt_vocab, max_len, False) train_data = data.TensorDataset(src_array, src_valid_len, tgt_array, tgt_valid_len) train_iter = data.DataLoader(train_data, batch_size, shuffle=True) return src_vocab, tgt_vocab, train_iter embed_size, num_hiddens, num_layers, dropout = 32, 32, 2, 0.0 batch_size, num_steps = 64, 10 lr, num_epochs, ctx = 0.005, 500, d2l.try_gpu() src_vocab, tgt_vocab, train_iter = load_data_nmt(batch_size, num_steps) encoder = d2l.Seq2SeqEncoder( len(src_vocab), embed_size, num_hiddens, num_layers, dropout) decoder = Seq2SeqAttentionDecoder( len(tgt_vocab), embed_size, num_hiddens, num_layers, dropout) model = d2l.EncoderDecoder(encoder, decoder) #训练和预测 d2l.train_s2s_ch9(model, train_iter, lr, num_epochs, ctx)
print('output anchors:', anchors.shape) print('output class preds:', cls_preds.shape) print('output bbox preds:', bbox_preds.shape) # %% [markdown] # ## Training # # ### Data Reading and Initialization # # %% batch_size = 32 train_iter, _ = d2l.load_data_pikachu(batch_size) ctx, net = d2l.try_gpu(), TinySSD(num_classes=1) net.initialize(init=init.Xavier(), ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 0.2, 'wd': 5e-4 }) # %% [markdown] # ### Define Losses # %% cls_loss = gloss.SoftmaxCrossEntropyLoss() bbox_loss = gloss.L1Loss() def calc_loss(cls_preds, cls_labels, bbox_preds, bbox_labels, bbox_masks):
# Import the basic packages import mxnet as mx from mxnet import nd, init, gluon, autograd, image from mxnet.gluon import data as gdata, loss as gloss, nn import numpy as np import d2l CTX = d2l.try_gpu() import time import matplotlib.pyplot as plt # Import the DenseVAE model import sys sys.path.insert(0, "./models") from DenseVAE import DenseVAE all_features = nd.load('../project_data/anime_faces.ndy')[0].as_in_context(CTX) all_features = nd.shuffle(all_features) # Use 80% of the data as training data # since the anime faces have no particular order, just take the first # 80% as training set # Prepare the training data and training data iterator n_train = int(all_features.shape[0] * 0.8) train_features = all_features[0:n_train] test_features = all_features[n_train:] batch_size = 64 train_iter = gdata.DataLoader(train_features, batch_size, shuffle=True, last_batch='keep') # Extract the training image's shape
dropout, i)) self.dense = nn.Dense(vocab_size, flatten=False) def init_state(self, enc_outputs, env_valid_len, *args): return [enc_outputs, env_valid_len, [None]*self.num_layers] def forward(self, X, state): X = self.pos_encoding(self.embedding(X) * math.sqrt(self.num_hiddens)) for blk in self.blks: X, state = blk(X, state) return self.dense(X), state num_hiddens, num_layers, dropout, batch_size, num_steps = 32, 2, 0.0, 64, 10 # ORG - lr, num_epochs, ctx = 0.005, 100, d2l.try_gpu() lr, num_epochs, ctx = 0.005, 100, d2l.try_gpu() # TEST ffn_num_hiddens, num_heads = 64, 4 # ORG src_vocab, tgt_vocab, train_iter = d2l.load_data_nmt(batch_size, num_steps) # ToDo : Switch the data from query """ def ORG_load_data_nmt(batch_size, num_steps, num_examples=1000): text = preprocess_nmt(read_data_nmt()) source, target = tokenize_nmt(text, num_examples) src_vocab = d2l.Vocab(source, min_freq=3, reserved_tokens=['<pad>', '<bos>', '<eos>']) tgt_vocab = d2l.Vocab(target, min_freq=3, reserved_tokens=['<pad>', '<bos>', '<eos>']) src_array, src_valid_len = build_array(source, src_vocab, num_steps, True) tgt_array, tgt_valid_len = build_array(target, tgt_vocab, num_steps, False) data_arrays = (src_array, src_valid_len, tgt_array, tgt_valid_len) data_iter = d2l.load_array(data_arrays, batch_size)
test_iter = gluon.data.DataLoader( test_set, batch_size=batch_size) return num_users, num_items, train_iter, test_iter if __name__ == "__main__": d2l.DATA_HUB['ml-100k'] = ( 'http://files.grouplens.org/datasets/movielens/ml-100k.zip', 'cd4dcac4241c8a4ad7badc7ca635da8a69dddb83') ctx = d2l.try_all_gpus() num_users, num_items, train_iter, test_iter = split_and_load_ml100k( test_ratio=0.1, batch_size=512) net = MF(30, num_users, num_items) net.initialize(ctx=ctx, force_reinit=True, init=mx.init.Normal(0.01)) # wd: weight decay. The weight decay mechanism has the same effect as the l2 regularization lr, num_epochs, wd, optimizer = 0.002, 20, 1e-5, 'adam' loss = gluon.loss.L2Loss() trainer = gluon.Trainer(net.collect_params(), optimizer, {"learning_rate": lr, 'wd': wd}) train_recsys_rating(net, train_iter, test_iter, loss, trainer, num_epochs, ctx, evaluator) scores = net(np.array([20], dtype='int', ctx=d2l.try_gpu()), np.array([30], dtype='int', ctx=d2l.try_gpu())) scores
x = self.flat(x) return x class Flatten(nn.Module): def forward(self, input): return input.view(input.size(0), -1) net = Net() X = torch.rand(size=(1, 1, 224, 224)) for layer in net.children(): X = layer(X) print(layer.__class__.__name__, 'output shape:\t', X.shape) lr, num_epochs, batch_size, device = 0.1, 5, 128, d2l.try_gpu() #Xavier initialization of weights def init_weights(m): if type(m) == nn.Linear or type(m) == nn.Conv2d: torch.nn.init.xavier_uniform_(m.weight) net.apply(init_weights) #Loading fashion-MNIST data train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) #criterion criterion = nn.CrossEntropyLoss()
strides=2)) else: blk.append(Residual(num_channels, num_channels)) return blk class Flatten(nn.Module): def forward(self, input): return input.view(input.size(0), -1) b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True)) b3 = nn.Sequential(*resnet_block(64, 128, 2)) b4 = nn.Sequential(*resnet_block(128, 256, 2)) b5 = nn.Sequential(*resnet_block(256, 512, 2)) net = nn.Sequential(b1, b2, b3, b4, b5, nn.AdaptiveMaxPool2d((1, 1)), Flatten(), nn.Linear(512, 10)) lr, num_epochs, batch_size, device = 0.05, 5, 256, d2l.try_gpu() def init_weights(m): if type(m) == nn.Linear or type(m) == nn.Conv2d: torch.nn.init.xavier_uniform_(m.weight) net.apply(init_weights) criterion = nn.CrossEntropyLoss() train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) d2l.train_ch5(net, train_iter, test_iter, criterion, num_epochs, batch_size, device, lr)
corpus_indices, vocab = d2l.load_data_time_machine() F.one_hot(torch.Tensor([0, 2]).long(), len(vocab)) def to_onehot(X, size): return F.one_hot(X.long().transpose(0, -1), size) X = torch.arange(10).reshape((2, 5)) inputs = to_onehot(X, len(vocab)) len(inputs), inputs[0].shape num_inputs, num_hiddens, num_outputs = len(vocab), 512, len(vocab) ctx = d2l.try_gpu() print('Using', ctx) # Create the parameters of the model, initialize them and attach gradients def get_params(): def _one(shape): return torch.Tensor(size=shape, device=ctx).normal_(std=0.01) # Hidden layer parameters W_xh = _one((num_inputs, num_hiddens)) W_hh = _one((num_hiddens, num_hiddens)) b_h = torch.zeros(num_hiddens, device=ctx) # Output layer parameters W_hq = _one((num_hiddens, num_outputs)) b_q = torch.zeros(num_outputs, device=ctx)
trainer.set_learning_rate(trainer.learning_rate * 0.1) # if epoch%10 == 0: # print(epoch) if epoch % 10 == 0: # animator.axes[1].imshow(postprocess(X).asnumpy()) animator.add(epoch, [nd.add_n(*contents_l).asscalar(), nd.add_n(*styles_l).asscalar(), tv_l.asscalar()]) if epoch % 100 == 0: d2l.plt.imsave('neural-style'+str(epoch)+'.png', postprocess(X).asnumpy()) return X d2l.try_gpu() ctx, image_shape = d2l.try_gpu(), (120, 200) net.collect_params().reset_ctx(ctx) content_X, contents_Y = get_contents(image_shape, ctx) _, styles_Y = get_styles(image_shape, ctx) output = train(content_X, contents_Y, styles_Y, ctx, 0.01, 200, 200) output1 = postprocess(output) output1 = output1.asnumpy() matplotlib.image.imsave('name1.png', output1) ctx, image_shape = d2l.try_gpu(), (758, 948) _, content_Y = get_contents(image_shape, ctx) _, style_Y = get_styles(image_shape, ctx) X = preprocess(postprocess(output) * 255, image_shape) output = train(X, content_Y, style_Y, ctx, 0.01, 300, 300)
nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), Flatten(), nn.Dropout(p=0.5, inplace=True), nn.Linear(in_features=6400, out_features=4096), nn.ReLU(), nn.Dropout2d(p=0.5, inplace=True), nn.Linear(in_features=4096, out_features=4096), nn.ReLU(), nn.Linear(in_features=4096, out_features=10)) X = torch.randn(size=(1, 1, 224, 224)) for layer in net: X = layer(X) print(layer.__class__.__name__, 'Output shape:\t', X.shape) batch_size = 128 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) lr, num_epochs, device = 0.01, 5, d2l.try_gpu() def init_weights(m): if type(m) == nn.Linear or type(m) == nn.Conv2d: torch.nn.init.xavier_uniform_(m.weight) net.apply(init_weights) optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=0.0005) criterion = nn.CrossEntropyLoss() d2l.train_ch5(net, train_iter, test_iter, criterion, num_epochs, batch_size, device, lr)
def train_VAE_GAN(vae_net, disc_net, train_features, test_features, test_results_dir, vae_parameters_path=None, batch_size=64, init_lr=0.001, pbp_weight=1, disc_loss_mul=10, n_epochs=200, n_solo_epochs=0, max_disc_loss=999, variable_pbp_weight='constant', pbp_weight_decay=1, CTX=d2l.try_gpu()): # VAE_net is a VAE network (most likely a ConvVAE with 512 latent variables # 32 base channels, 3 * 64 * 64 output shape # disc_net is a discriminator network (most likely a ResNet) # whose output of (batch_size, 1) # test_results_dir is the directory (must end with a slash /) that contains # the validation images after all epochs were run # vae_parameters_dir is the path (so directory + filename) that the trained # VAE's model parameters will be saved to. # n_solo_epochs indicate the number of epochs that the VAE will train using # no discriminator; n_solo_epochs must be smaller than n_epochs, and # the number of epochs trained with discriminator is # n_epochs - n_solo_epochs # max_disc_loss is the maximum loss beyond which the discriminator's loss # will not be used in updating VAE in the generator cycle # If variable_pbp_weight is False/None, then the pbp weight will remain # constant for all epochs (except when training solo, in which case # the pbp weight is adjusted to 1, but will revert back to the specified value # after solo epochs are done. # # If variable_pbp_weight is 'decay', then for every 25 combo epochs the # pbp_weight will decrease by constant factor. ############################################################################# ## MODEL INITIALIZATION AND TRAINER ############################################################################# # # Initialize the VAE network and get its trainer print( '[STATE]: Initializing model parameters and constructing Gluon trainers' ) # Set the pbp weight to the desired value vae_net.pbp_weight = pbp_weight vae_net.collect_params().initialize(mx.init.Xavier(), force_reinit=True, ctx=CTX) vae_trainer = gluon.Trainer(vae_net.collect_params(), 'adam', {'learning_rate': init_lr}) # Initialize the Disc network nd get its trainer disc_net.collect_params().initialize(mx.init.Xavier(), force_reinit=True, ctx=CTX) disc_trainer = gluon.Trainer(disc_net.collect_params(), 'adam', {'learning_rate': init_lr}) ############################################################################# ## Output file writer initialization ############################################################################# # # Open a file to write to for training statistics; the training statistics csv # will be written to the results directory csv_writer = None try: csv_writer = open(test_results_dir + 'training_statistics.csv', 'w') print('[STATE]: Writing training statistics to ' + test_results_dir + 'training_statistics.csv') except: print( '[ERROR]: test results directory not valid, writing training statistics to main directory' ) csv_writer = open('./training_statistics.csv', 'w') # CSV file needs to open with a header that is the column names csv_writer.write('epoch,vae_loss,disc_loss,time_consumed\n') # Open a file to write README.md for displaying validation images; the README # file will be written to the results directory readme_writer = None try: readme_writer = open(test_results_dir + 'README.md', 'w') print('[STATE]: Writing README report to ' + test_results_dir + 'README.md') except: print( '[ERROR]: test results directory not valid, writing readme to main directory' ) csv_writer = open('./README.md', 'w') # Write a few lines on README to indicate the hyper parameters readme_writer.write('n_latent:{} \n\n'.format(vae_net.n_latent)) readme_writer.write('n_base_channels:{} \n\n'.format( vae_net.n_base_channels)) if variable_pbp_weight == 'constant': readme_writer.write('pixel-by-pixel loss weight:{} \n\n'.format( vae_net.pbp_weight)) elif variable_pbp_weight == 'decay': readme_writer.write( 'pixel-by-pixel loss weight initially {} and decay by {} every 25 combo epochs \n\n' .format(vae_net.pbp_weight, pbp_weight_decay)) readme_writer.write('n_solo_epochs:{} \n\n'.format(n_solo_epochs)) readme_writer.write('n_combo_epochs:{} \n\n'.format(n_epochs - n_solo_epochs)) readme_writer.write('max_disc_loss :{} \n\n'.format(max_disc_loss)) ############################################################################# ## Data iterator ############################################################################# # # Load training features into an iterator train_iter = gdata.DataLoader(train_features, batch_size, shuffle=True, last_batch='keep') sample_size = train_features.shape[0] print('[STATE]: {} training samples loaded into iterator'.format( sample_size)) ############################################################################# ## Training parameters ############################################################################# # # Figure out the number of epochs trained with VAE and Discriminator together n_combo_epochs = n_epochs - n_solo_epochs print( '[STATE]: {} solo epochs and {} combo epochs are to be trained'.format( n_solo_epochs, n_combo_epochs)) ############################################################################# ## Training ############################################################################# # # Print a message, then start training print('[STATE]: Training started') # First train the solo rounds; when training the solo rounds, PBP weight # is 1; however it needs to be changed back to the specified constant # when ConvVAE is initialized, or a variable PBP weight, so I will keep # a copy of the specified PBP weight to be used later specified_pbp_weight = vae_net.pbp_weight vae_net.pbp_weight = 1 for epoch in range(n_solo_epochs): # Initialize a list that records the average VAE loss per batch batch_losses = [] epoch_start_time = time.time() # Iterate through the epochs for batch_features in train_iter: # Load the batch into the appropriate context batch_features = batch_features.as_in_context(CTX) # Compute loss, gradient, and update paramters using trainer with autograd.record(): loss = vae_net(batch_features) loss.backward() vae_trainer.step(batch_features.shape[0]) # Compute the mean loss among the batch, append it onto the batch # losses list batch_losses.append(nd.mean(loss).asscalar()) # Compute the training loss per epoch by a mean of batch losses epoch_train_loss = np.mean(batch_losses) epoch_stop_time = time.time() time_consumed = epoch_stop_time - epoch_start_time # Generate the epoch report and write it to the README file # and print it epoch_report_str = 'Epoch{}, Training loss {:.10f}, Time used {:.2f}'.format( epoch, epoch_train_loss, time_consumed) readme_writer.write(epoch_report_str + '\n\n') print('[STATE]: ' + epoch_report_str) # Now that all solo rounds are over, revert the PBP weight of the vae back to the original # specified value vae_net.pbp_weight = specified_pbp_weight # Now train the combo rounds; we will use BinarySigmoidCrossEntropyLoss() # for discriminator loss disc_loss_func = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=False) # Define an integer that is 0 if discriminator loss in an epoch is larger than # the specified max_disc_loss (discriminator is bad, don't follow it) # Before any training, set it to 1 use_disc_loss = 1 for epoch in range(n_solo_epochs, n_epochs): start_time = time.time() # Initialize the lists that records the average loss within each batch vae_batch_losses = [] disc_batch_losses = [] # Iterate through the batches for batch_features in train_iter: # Load the batch into the appropriate context batch_features = batch_features.as_in_context(CTX) # Record the batch_size because it may not be the specified batch size act_batch_size = batch_features.shape[0] # Generate some 1s and 0s for distinguishing genuine images from # generated images genuine_labels = nd.ones((act_batch_size, ), ctx=CTX) generated_labels = nd.zeros((act_batch_size, ), ctx=CTX) ############################################################################ # UPDATE THE DISCRIMINATOR NETWORK ############################################################################ with autograd.record(): # Train with genuine images: make predictions on genuine images genuine_logit_preds = disc_net(batch_features) genuine_loss = disc_loss_func(genuine_logit_preds, genuine_labels) # Train with generated images: make predictions on generated images generated_features = vae_net.generate(batch_features) generated_logit_preds = disc_net(generated_features) generated_loss = disc_loss_func(generated_logit_preds, generated_labels) # Total loss is loss with genuine and with generated images disc_loss = genuine_loss + generated_loss disc_loss.backward() disc_batch_losses.append(nd.mean(disc_loss).asscalar()) # Update the parameters in the convolutional discriminator disc_trainer.step(act_batch_size) ############################################################################ # UPDATE THE VAE NETWORK ############################################################################ with autograd.record(): # Compute the discriminator loss by letting the discriminator network # make predictions on the generated images generated_features = vae_net.generate(batch_features) generated_logit_preds = disc_net(generated_features) batch_disc_loss = disc_loss_func(generated_logit_preds, genuine_labels) # Sum up the VAE loss and the discriminator loss (with multiplier of 10) # Then multiply batch_disc_loss by an integer # that is 1 if gen_loss = vae_net( batch_features ) + batch_disc_loss * disc_loss_mul * use_disc_loss gen_loss.backward() # Record the VAE batch loss's average vae_batch_losses.append(nd.mean(gen_loss).asscalar()) # Update the parameters in the VAE network vae_trainer.step(act_batch_size) ############################################################################ # NEAR THE END OF THIS EPOCH ############################################################################ # Compute some summarical metrics of this epoch stop_time = time.time() time_consumed = stop_time - start_time epoch_disc_train_loss = np.mean(disc_batch_losses) epoch_vae_train_loss = np.mean(vae_batch_losses) # If variable_pbp_weight is set to decay, then decay the pbp weight if variable_pbp_weight == 'decay': if (1 + epoch) % 25 == 0: vae_net.pbp_weight = vae_net.pbp_weight * pbp_weight_decay print('VAE PBP weight adjusted to {:.10f}'.format( vae_net.pbp_weight)) # Check if discriminator is good enough at the end of this epoch # if good enough, keep use_disc_loss at 1 if epoch_disc_train_loss <= max_disc_loss: use_disc_loss = 1 else: # Note that even if use_disc_loss is set to 0 # discriminator will still be trained in the next epoch, # just its loss not used in the VAE update cycle use_disc_loss = 0 # Generate the README line and the csv line, and write them epoch_README_report = 'Epoch{}, VAE Training loss {:.5f}, ResNet Training loss {:.10f}, Time used {:.2f}' epoch_README_report = epoch_README_report.format( epoch, epoch_vae_train_loss, epoch_disc_train_loss, time_consumed) epoch_CSV_report = '{},{:.10f},{:.10f},{:.2f}'.format( epoch, epoch_vae_train_loss, epoch_disc_train_loss, time_consumed) readme_writer.write(epoch_README_report + '\n\n') csv_writer.write(epoch_CSV_report + '\n') print('[STATE]: ' + epoch_README_report) ############################################################################ # END OF TRAINING, now onto the validation process ############################################################################ # Close the CSV writer because there is nothing left to write csv_writer.close() # Save model parameters; if vae_parameters_path is not valid, do not save it try: vae_net.save_parameters(vae_parameters_path) except: print( '[ERROR]: VAE parameters path is not valid; parameters will be saved to main directory' ) vae_net.save_parameters('./recent_model.params') # Define the number of validation images to generate # then use the vae_net to generate them n_validations = 10 img_arrays = vae_net.generate( test_features[0:n_validations].as_in_context(CTX)).asnumpy() for i in range(n_validations): # Write a line in the README report the displaying the generated images readme_writer.write('![' + str(i) + '](./' + str(i) + '.png)') # Reshape the output from (n_channels, width, height) to (width, height, n_channels) # Note that the vae_net instance already has such information regarding # the training images img_array = img_arrays[i].reshape( (vae_net.out_width, vae_net.out_height, vae_net.n_channels)) # Show the plot, save it. If test_results_dir is not valid, # save it to main directory plt.imshow(img_array) try: plt.savefig(test_results_dir + str(i) + '.png') print('[STATE]: ' + test_results_dir + str(i) + '.png' + ' saved') except: print( '[ERROR]: test results directory not valid, saving images to main directory' ) plt.savefig('./' + str(i) + '.png') plt.close() # Close the README writer readme_writer.close()