def init_hidden(self, batch_size): # NOTE: LSTM needs 2 hidden states hidden = [ Variable(torch.zeros(self.n_layers, batch_size, self.d_inner_hid)), Variable(torch.zeros(self.n_layers, batch_size, self.d_inner_hid)) ] hidden[0] = check_cuda(hidden[0], self.use_cuda) hidden[1] = check_cuda(hidden[1], self.use_cuda) return hidden
def get_batch_label(data, label, index, batch_size, testing=False): tensor = torch.from_numpy(data[index:index + batch_size]).type( torch.LongTensor) input_data = Variable(tensor, volatile=testing, requires_grad=False) input_data = check_cuda(input_data, use_cuda) label_tensor = torch.from_numpy(label[index:index + batch_size]).type( torch.LongTensor) output_data = Variable(label_tensor, volatile=testing, requires_grad=False) output_data = check_cuda(output_data, use_cuda) return input_data, output_data
def _sample_latent(self, enc_hidden): mu = self._enc_mu(enc_hidden) log_sigma = self._enc_log_sigma(enc_hidden) sigma = torch.exp(log_sigma) std_z = torch.from_numpy(np.random.normal(0, 1, size=sigma.size())).float() self.z_mean = mu self.z_sigma = sigma std_z_var = Variable(std_z, requires_grad=False) std_z_var = check_cuda(std_z_var, self.use_cuda) return mu + sigma * std_z_var
def forward(self, input_sentence, is_softmax=False, dont_pass_emb=False): if dont_pass_emb: emb_sentence = input_sentence else: emb_sentence = self.src_word_emb(input_sentence) relu1 = F.relu(self.conv1(emb_sentence)) layer1 = F.max_pool1d(relu1, 3) relu2 = F.relu(self.conv2(layer1)) layer2 = F.max_pool1d(relu2, 3) layer3 = F.max_pool1d(F.relu(self.conv2(layer2)), 10) flatten = self.drop(layer2.view(layer3.size()[0], -1)) if not hasattr(self, 'linear'): self.linear = nn.Linear(flatten.size()[1], 2) self.linear = check_cuda(self.linear, self.use_cuda) logit = self.linear(flatten) if is_softmax: logit = self.softmax(logit) return logit
def main(): """! @brief Main function for predicting the image class(es) using a trained model. """ args = parse_arguments() test_on_gpu = (args.gpu and check_cuda()) cat_to_name = get_label_mapping(input_json=args.cat_to_name) # Load checkpoint model, ckpt_dict = load_checkpoint(args.checkpoint, train_on_gpu=test_on_gpu) idx_to_class = { idx: cat_to_name[c] for c, idx in ckpt_dict['class_to_idx'].items() } # Pre-process image image = process_image(args.image_path) image = torch.unsqueeze(image, 0) if test_on_gpu: image = image.cuda() # Get actual label label = os.path.basename(os.path.dirname(args.image_path)) label = cat_to_name[label] # Predictions - top K classes prob_k, ind_k = predict(image, model, topk=args.top_k) classes_k = map_classes(ind_k, idx_to_class) print("True label: '{}'".format(label)) print("") print_results(classes_k, prob_k) # Plot image and predictions if args.plot: fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(5, 10)) imshow(torch.squeeze(image.cpu()), ax=ax1, title=label) plot_predictions(prob_k, classes_k, ax=ax2, topk=args.top_k) plt.show()
def test(net, test_dataset): """ Test the model on test data, and print statistics :param net: nn.Module :param test_dataset: torch.utils.data.Dataset :return: """ logging.info("Started predicting testing data...") computing_device, extra = check_cuda() test_loader = DataLoader(test_dataset, batch_size=SETTINGS["BATCH_SIZE"], shuffle=False) with torch.no_grad(): net.eval() all_predictions = [] all_labels = [] for images, labels in test_loader: # Remember they come in batches images, labels = images.to(computing_device), labels.to( computing_device) # Since we are not doing this through criterion, we must add softmax our self outputs = func.softmax(net(images), dim=1) _, predicted = torch.max(outputs.data, 1) predicted = func.one_hot(predicted, num_classes=SETTINGS['NUM_CLASSES']).type( torch.FloatTensor) labels = func.one_hot(labels, num_classes=SETTINGS['NUM_CLASSES']).type( torch.FloatTensor) all_predictions.append(predicted) all_labels.append(labels) all_predictions = torch.cat(all_predictions) all_labels = torch.cat(all_labels) logging.info("Evaluating test results...") evaluate(all_predictions, all_labels, net, SETTINGS) sklearn_acc_per_class(all_labels, all_predictions)
# Make instances encoder = Encoder( n_src_vocab=max_features, use_cuda=use_cuda, ) decoder = Generator( n_target_vocab=max_features, c_dim=c_dim, use_cuda=use_cuda, ) discriminator = Discriminator( n_src_vocab=max_features, maxlen=maxlen, use_cuda=use_cuda, ) encoder = check_cuda(encoder, use_cuda) decoder = check_cuda(decoder, use_cuda) discriminator = check_cuda(discriminator, use_cuda) criterion = torch.nn.CrossEntropyLoss() vae_parameters = list(encoder.parameters()) + list(decoder.parameters()) vae_opt = Adam(vae_parameters) e_opt = Adam(encoder.parameters()) g_opt = Adam(decoder.parameters()) d_opt = Adam(discriminator.parameters()) def train_discriminator(discriminator): # TODO: empirical Shannon entropy print_epoch = 0 for epoch_index in range(epoch): for batch, index in enumerate(range(0, len(x_train) - 1, batch_size)):
def main(): """! @brief Main function for model training and evaluation. """ args = parse_arguments() loader_tr, loader_val, class2idx = \ load_data(args.input, batch_size=args.batch_size, n_workers=args.n_workers) train_on_gpu = (args.gpu and check_cuda()) num_classes = len(class2idx) if args.checkpoint: print("Loading model checkpoint...") model, ckpt_dict = load_checkpoint(args.checkpoint, train_on_gpu=train_on_gpu) loss_val_min = ckpt_dict['loss'] epoch1 = ckpt_dict['epoch'] + 1 args.pretrained = ckpt_dict['pretrained'] class2idx = ckpt_dict['class_to_idx'] else: epoch1 = 0 # Build model architecture model = build_model(num_classes, pretrained=args.pretrained, train_on_gpu=train_on_gpu) loss_val_min = np.Inf # Specify optimizer and learning rate if args.pretrained[:6] == "resnet": params = model.parameters() else: params = model.classifier.parameters() optimizer = optim.SGD(params, lr=args.learning_rate, momentum=0.9) if args.checkpoint: optimizer.load_state_dict(ckpt_dict['optimizer_state']) # Specify loss function (categorical cross-entropy) criterion = nn.CrossEntropyLoss() # Train model for epoch in range(epoch1, args.n_epochs): loss_tr = train_model(model, loader_tr, criterion, optimizer, train_on_gpu=train_on_gpu) loss_val, acc_val = evaluate_model(model, loader_val, criterion, n_classes=num_classes, train_on_gpu=train_on_gpu) # Print training/validation statistics print('Epoch: {} \tTraining Loss: {:.6f} ' '\tValidation Loss: {:.6f}' '\tValidation Accuracy: {:.2f}' .format(epoch + 1, loss_tr, loss_val, acc_val)) # Save model if validation loss has decreased if loss_val <= loss_val_min: print('Validation loss decreased ({:.6f} --> {:.6f}). ' 'Saving model ...'.format(loss_val_min, loss_val)) save_checkpoint(args.output, model, optimizer, loss_val, epoch, class2idx, pretrained=args.pretrained) loss_val_min = loss_val
def train(dataset): computing_device, extra = check_cuda() # Save all the k models to compare nnets = [] batch_size = SETTINGS['BATCH_SIZE'] # Get a lists of train-val-split for k folds if SETTINGS['K-FOLD']: indices = get_k_fold_indecies(dataset, SETTINGS['RANDOM_SEED'], k=SETTINGS['K-FOLD-NUMBER']) else: indices = list(range(len(dataset))) validation_split = .1 split = int(np.floor(validation_split * len(dataset))) np.random.seed(SETTINGS['RANDOM_SEED']) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] indices = [(train_indices, val_indices)] for k, (train_indices, val_indices) in enumerate(indices): logging.info("#" * 20) logging.info("Training Model {}".format(k)) # Load data for this fold train_sampler = SubsetRandomSampler(train_indices) valid_sampler = SubsetRandomSampler(val_indices) train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, num_workers=10) validation_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=10) # Initialize CNN if SETTINGS['NNET'] is None: net = models.resnet152(pretrained=True) # Freeze parameters, so gradient not computed here for param in net.parameters(): param.requires_grad = False net.fc = nn.Linear(net.fc.in_features, SETTINGS['NUM_CLASSES']) net = net.to(computing_device) net.train_epoch_losses = [] net.val_epoch_losses = [] else: net = SETTINGS['NNET']( SETTINGS['NUM_CLASSES']).to(computing_device) net.apply(weights_init) # Initialize optimizer and criterion if SETTINGS["WLOSS"]: criterion = nn.CrossEntropyLoss( weight=dataset.get_class_weights().to(computing_device)) else: criterion = nn.CrossEntropyLoss() parameters_to_learn = [] if SETTINGS['NNET'] is None: for name, param in net.named_parameters(): if param.requires_grad: parameters_to_learn.append(param) else: parameters_to_learn = net.parameters() optimizer = optim.Adam(parameters_to_learn, lr=SETTINGS["LR"], weight_decay=SETTINGS["DECAY"]) # Fit and save model to file if SETTINGS['K-FOLD']: save_path = "./{}_model{}_{}.pth".format(net.__class__.__name__, k, TIME) else: save_path = "./{}_model_{}.pth".format(net.__class__.__name__, TIME) fit_model(computing_device, net, criterion, optimizer, train_loader, validation_loader, save_path=save_path) nnets.append(net) best_net = None for nnet in nnets: if best_net is None or min(nnet.val_epoch_losses) < min( best_net.val_epoch_losses): best_net = nnet return best_net
def init_hidden_c_for_lstm(self, batch_size): hidden = Variable( torch.zeros(self.n_layers, batch_size, self.d_inner_hid)) hidden = check_cuda(hidden, self.use_cuda) return hidden
model = models.create_model(args, place) Example = namedtuple("Example", ["src", "data_id"]) context = [] start_info = "Enter [EXIT] to quit the interaction, [NEXT] to start a new conversation." cprint(start_info, "yellow", attrs=["bold"]) while True: user_utt = input(colored("[Human]: ", "red", attrs=["bold"])).strip() if user_utt == "[EXIT]": break elif user_utt == "[NEXT]": context = [] cprint(start_info, "yellow", attrs=["bold"]) else: context.append(user_utt) example = Example(src=" [SEP] ".join(context), data_id=0) record = task.reader._convert_example_to_record(example, is_infer=True) data = task.reader._pad_batch_records([record], is_infer=True) pred = task.infer_step(model, data)[0] bot_response = pred["response"] print(colored("[Bot]:", "blue", attrs=["bold"]), colored(bot_response, attrs=["bold"])) context.append(bot_response) return if __name__ == "__main__": args = setup_args() check_cuda(True) interact(args)
def train_vae(encoder, decoder): encoder.train() decoder.train() for epoch_index in range(epoch): for batch, index in enumerate(range(0, len(x_train) - 1, batch_size)): total_loss = 0 input_data, output_data = get_batch(x_train, index, batch_size) encoder.zero_grad() decoder.zero_grad() vae_opt.zero_grad() # Considering the data may do not have enough data for batching # Init. hidden with len(input_data) instead of batch_size enc_hidden = encoder.init_hidden(len(input_data)) # Input of encoder is a batch of sequence. enc_hidden = encoder(input_data, enc_hidden) # Generate the random one-hot array from prior p(c) # NOTE: Assume general distribution for now random_one_dim = np.random.randint(c_dim, size=len(input_data)) one_hot_array = np.zeros((len(input_data), c_dim)) one_hot_array[np.arange(len(input_data)), random_one_dim] = 1 c = torch.from_numpy(one_hot_array).float() var_c = Variable(c, requires_grad=False) var_c = check_cuda(var_c, use_cuda) # TODO: use iteration along first dim. cat_hidden = (torch.cat([enc_hidden[0][0], var_c], dim=1).unsqueeze(0), torch.cat([ decoder.init_hidden_c_for_lstm( len(input_data))[0], var_c ], dim=1).unsqueeze(0)) # Reshape output_data from (batch_size, seq_len) to (seq_len, batch_size) output_data = output_data.permute(1, 0) # Input of decoder is a batch of word-by-word. for index, word in enumerate(output_data): if index == len(output_data) - 1: break output, cat_hidden = decoder(word, cat_hidden) next_word = output_data[index + 1] total_loss += criterion(output.view(-1, max_features), next_word) # Train avg_loss = total_loss.data[0] / maxlen ll = latent_loss(encoder.z_mean, encoder.z_sigma) total_loss += ll total_loss.backward() vae_opt.step() if batch % 25 == 0: print( "[VAE] Epoch {} batch {}'s average language loss: {}, latent loss: {}" .format( epoch_index, batch, avg_loss, ll.data[0], ))
def train_vae_with_attr_loss(encoder, decoder, discriminator): for epoch_index in range(epoch): for batch, index in enumerate(range(0, len(x_train) - 1, batch_size)): encoder.zero_grad() decoder.zero_grad() e_opt.zero_grad() g_opt.zero_grad() vae_loss = 0 ll = 0 input_data, output_data = get_batch_label(x_train, y_train, index, batch_size) enc_hidden = encoder.init_hidden(len(input_data)) enc_hidden = encoder(input_data, enc_hidden) target = np.array([output_data.cpu().data.numpy()]).reshape(-1) one_hot_array = np.eye(c_dim)[target] c = torch.from_numpy(one_hot_array).float() var_c = Variable(c, requires_grad=False) var_c = check_cuda(var_c, use_cuda) # TODO: use iteration along first dim. cat_hidden = (torch.cat([enc_hidden[0][0], var_c], dim=1).unsqueeze(0), torch.cat([ decoder.init_hidden_c_for_lstm( len(input_data))[0], var_c ], dim=1).unsqueeze(0)) batch_init_word = np.zeros((batch_size, max_features)) batch_init_word[np.arange(batch_size), Constants.BOS] = 1 batch_init_word = Variable(torch.from_numpy(batch_init_word), requires_grad=False).float() batch_init_word = check_cuda(batch_init_word, use_cuda) input_data = input_data.permute(1, 0) for index in range(maxlen - 1): if 'next_word' in locals(): word = next_word.squeeze(1) word = check_cuda(word, use_cuda) output, cat_hidden, pre_soft = decoder(word, cat_hidden, low_temp=True, one_hot_input=True) else: word = batch_init_word word = check_cuda(word, use_cuda) output, cat_hidden, pre_soft = decoder(word, cat_hidden, low_temp=True, one_hot_input=True) # From one-hot to word embedding next_word = output correct_word = input_data[index + 1] vae_loss += criterion(pre_soft.view(-1, max_features), correct_word) if len(batch_init_word.size()) == 2: batch_init_word = batch_init_word.unsqueeze(1) if len(next_word.size()) == 2: next_word = next_word.unsqueeze(1) batch_init_word = torch.cat([batch_init_word, next_word], dim=1) # NOTE Latent loss ll = latent_loss(encoder.z_mean, encoder.z_sigma) # NOTE L_attr_c loss generated_sentence = batch_init_word discriminator.eval() logit = discriminator(generated_sentence, dont_pass_emb=True) l_attr_c = criterion(logit, output_data) # NOTE L_attr_z loss encoder.eval() generated_sentence = decoder.one_hot_to_word_emb( generated_sentence) encoded_gen = encoder.init_hidden(len(generated_sentence)) encoded_gen = encoder(generated_sentence, encoded_gen, dont_pass_emb=True) l_attr_z = latent_loss(encoder.z_mean, encoder.z_sigma) avg_loss = vae_loss.data[0] / maxlen total_vae_loss = vae_loss + ll extra_decoder_loss = lambda_c * l_attr_c + lambda_z * l_attr_z total_vae_loss.backward() #e_opt.step() #extra_decoder_loss.backward() #g_opt.step() vae_opt.step() if batch % 25 == 0: print( "[Attr] Epoch {} batch {}'s average language loss: {}, latent loss: {}" .format( epoch_index, batch, avg_loss, ll.data[0], )) print("l_attr_c loss: {}, l_attr_z loss: {}".format( l_attr_c.data[0], l_attr_z.data[0], ))
import torch import torch.nn as nn import pdb from utils import check_cuda device = check_cuda() ## Credit https://github.com/fastai/fastai/blob/master/fastai/layers.py#L285 def trunc_normal_(x, mean: float = 0., std: float = 1.): "Truncated normal initialization." return x.normal_().fmod_(2).mul_(std).add_(mean) def embedding(ni, nf, padding_idx=None): "Create an embedding layer." emb = nn.Embedding(ni, nf, padding_idx) # See https://arxiv.org/abs/1711.09160 with torch.no_grad(): trunc_normal_(emb.weight, std=0.01) return emb class RecurLayer(nn.Module): """Multiple LSTM Layers with skip input connections. Gives all layer outsputs and states""" def __init__(self, dims=10, num_layers=1): super().__init__() self.num_layers = num_layers self.rnns = nn.ModuleList([nn.LSTM(dims, dims, batch_first=True)])
pcff_str = '' if samplerate <= 0.0 else f'pcff_s{samplerate}' drop_str = '' if droprate <= 0.0 else f'_p{droprate}' return f'dense_2_1024_{activname}{pcff_str}{drop_str}'.replace('.', '') log_file = 'drop_{}_{}_{}_{}.log'.format( get_model_name(), '-'.join([ str(i) for i in (train_batch, val_batch, test_batch)]), dataset_flavor, timestamp_run) log_title = log_file[:-4] logger = Log(log_dir + log_file) logger.start(log_title) logger.start_intercept() # check cuda availablility when needed if use_cuda: check_cuda() # set up mnist dataset image size (c, h, w) = (1, 28, 28) if dataset_flavor in AVAILABLE_FLAVORS: ((train_loader, val_loader, test_loader), (nb_train, nb_val, nb_test)) = get_mnist_dataloaders( data_dir, train_batch, val_batch, test_batch, train_val_split, use_cuda, dataset_flavor, keep_shape=False) else: raise Exception('Unknown dataset: {}'.format(dataset_flavor)) print('dataset: {}, location: {}'.format(dataset_flavor, data_dir)) print('sample / batch number for training: ', nb_train, len(train_loader)) print('sample / batch number for validation:', nb_val, len(val_loader))
Example = namedtuple("Example", ["src", "data_id"]) context = [] start_info = "Enter [EXIT] to quit the interaction, [NEXT] to start a new conversation." cprint(start_info, "yellow", attrs=["bold"]) while True: user_utt = input(colored("[Human]: ", "red", attrs=["bold"])).strip() if user_utt == "[EXIT]": break elif user_utt == "[NEXT]": context = [] cprint(start_info, "yellow", attrs=["bold"]) else: context.append(user_utt) example = Example(src=" [SEP] ".join(context), data_id=0) record = task.reader._convert_example_to_record(example, is_infer=True) data = task.reader._pad_batch_records([record], is_infer=True) pred = task.infer_step(model, data)[0] bot_response = pred["response"] print(colored("[Bot]:", "blue", attrs=["bold"]), colored(bot_response, attrs=["bold"])) context.append(bot_response) return if __name__ == "__main__": args = setup_args() check_cuda(False) interact(args)
train_pyreader.decorate_paddle_reader(train_reader) valid_pyreader.decorate_paddle_reader(valid_reader) train_with_pyreader(exe, train_prog, train_exe, train_pyreader, train_fetch_list, train_metrics, epochs=epochs, log_interval=args.log_interval, valid_interval=args.valid_interval, save_dir=args.save_dir, save_model_name=args.model_name, enable_ce=args.enable_ce, test_exe=valid_exe, test_pyreader=valid_pyreader, test_fetch_list=valid_fetch_list, test_metrics=valid_metrics) if __name__ == "__main__": args = parse_args() # check whether the installed paddle is compiled with GPU check_cuda(args.use_gpu) logger.info(args) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) train(args)
print("Final test result:") fetch_list = [test_net["probs"].name, test_net["labels"].name] evaluate(test_exe, test_prog, test_pyreader, fetch_list, "test", True) # infer if args.do_infer: print("Final infer result:") fetch_list = [infer_net["probs"].name] infer(test_exe, test_prog, infer_pyreader, fetch_list, "infer") def get_cards(): num = 0 cards = os.environ.get('CUDA_VISIBLE_DEVICES', '') if cards != '': num = len(cards.split(",")) return num if __name__ == "__main__": args = PDConfig('config.json') args.build() #args.print_arguments() utils.check_cuda(args.use_cuda) main(args)