def main(): args = parse_args() data_dir = "flowers" train_dir = data_dir + "/train" valid_dir = data_dir + "/valid" test_dir = data_dir + "/test" training_transforms = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomRotation(30), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) validataion_transforms = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) testing_transforms = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) image_datasets = [ ImageFolder(train_dir, transform=training_transforms), ImageFolder(valid_dir, transform=validataion_transforms), ImageFolder(test_dir, transform=testing_transforms) ] dataloaders = [ torch.utils.data.DataLoader(image_datasets[0], batch_size=64, shuffle=True), torch.utils.data.DataLoader(image_datasets[1], batch_size=64), torch.utils.data.DataLoader(image_datasets[2], batch_size=64) ] model = getattr(models, args.arch)(pretrained=True) for param in model.parameters(): param.requires_grad = False if args.arch == "vgg13": feature_num = model.classifier[0].in_features classifier = nn.Sequential( OrderedDict([("fc1", nn.Linear(feature_num, 1024)), ("drop", nn.Dropout(p=0.5)), ("relu", nn.ReLU()), ("fc2", nn.Linear(1024, 102)), ("output", nn.LogSoftmax(dim=1))])) elif args.arch == "vgg19": feature_num = model.classifier[0].in_features classifier = nn.Sequential( OrderedDict([("fc1", nn.Linear(feature_num, 1024)), ("drop", nn.Dropout(p=0.5)), ("relu", nn.ReLU()), ("fc2", nn.Linear(1024, 102)), ("output", nn.LogSoftmax(dim=1))])) model.classifier = classifier criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.classifier.parameters(), lr=float(args.learning_rate)) epochs = int(args.epochs) class_index = image_datasets[0].class_to_idx gpu = args.gpu train(model, criterion, optimizer, dataloaders, epochs, gpu) model.class_to_idx = class_index save_checkpoint(model, optimizer, args, classifier)
def build_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: a NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Build encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) rk_encoder = None if model_opt.use_retrieved_keys: rk_encoder = build_rk_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Build decoder. tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build NMTModel(= encoder + decoder). device = torch.device("cuda" if gpu else "cpu") model = onmt.models.NMTModel(encoder, rk_encoder, decoder, rk_to_src_attn=model_opt.rk_to_src_attn) model.model_type = model_opt.model_type # Build Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
def lastBlock(self, inF, outF, kernalSize, *args, **kwargs): return nn.Sequential(nn.Conv2d(inF, outF, kernalSize, *args, **kwargs), nn.LogSoftmax(dim=1))
def __init__(self, hidden_size, output_size): super(Generator, self).__init__() self.output = nn.Linear(hidden_size, output_size) self.softmax = nn.LogSoftmax(dim=-1)
def __init__(self): super(CriterionKLPixelWise, self).__init__() self.klloss = nn.KLDivLoss(reduction='batchmean') self.logsoftmax = nn.LogSoftmax(dim=1)
def train(model, train_loader, val_loader, optimizer, num_epochs, path_to_save_best_weights): model.train() log_softmax = nn.LogSoftmax(dim=1) # Use for NLLLoss() softmax = nn.Softmax(dim=1) # weights = [1.0,1.0,1.0,1.0,1.0, 0.0] # class_weights = torch.FloatTensor(weights).to(device) criterion_nlloss = nn.NLLLoss() #(weight=class_weights) metrics_evaluator = PerformanceMetricsEvaluator() to_tensor = transforms.ToTensor() writer = SummaryWriter('runs/unet') since = time.time() best_model_weights = model.state_dict() best_IoU = 0.0 best_val_loss = 1000000000 curr_val_loss = 0.0 curr_training_loss = 0.0 curr_training_IoU = 0.0 curr_val_IoU = 0.0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) for phase in ['train', 'val']: if phase == 'train': # scheduler.step(best_val_loss) model.train() data_loader = train_loader else: model.eval() data_loader = val_loader running_loss = 0.0 running_IoU = 0 # Iterate over data. ind = 0 for imgs, masks in tqdm(data_loader): imgs = imgs.to(device) masks = masks.to(device) # zero the parameter gradients optimizer.zero_grad() # forward logits = model(imgs) log_softmax_logits = log_softmax(logits) loss = criterion_nlloss(log_softmax_logits, masks) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # ================================================================== # # Tensorboard Logging # # ================================================================== # unet_softmax_collupsed = softmax(logits) unet_softmax_collupsed = np.argmax( unet_softmax_collupsed.detach().cpu(), axis=1) if ind % 10 == 0: if phase == 'val': img_name = 'ValidationEpoch: {}'.format(str(epoch)) else: img_name = 'TrainingEpoch: {}'.format(str(epoch)) rgb_prediction = unet_softmax_collupsed.repeat(3, 1, 1).float() rgb_prediction = np.moveaxis(rgb_prediction.numpy(), 0, -1) converted_img = img_to_visible(rgb_prediction) converted_img = torch.unsqueeze(to_tensor(converted_img), 0) # converted_img = np.moveaxis(converted_img, -1, 0) masks_changed = masks.detach().cpu() masks_changed = masks_changed.repeat(3, 1, 1).float() masks_changed = np.moveaxis(masks_changed.numpy(), 0, -1) masks_changed = img_to_visible(masks_changed) masks_changed = torch.unsqueeze(to_tensor(masks_changed), 0) # print(np.unique(converted_img, return_counts=True)) third_tensor = torch.cat( (converted_img, imgs.detach().cpu(), masks_changed), -1) writer.add_image( img_name, # vutils.make_grid([ # imgs.detach().cpu(), # rgb_prediction third_tensor, # ]), epoch) # statistics running_loss += loss.detach().item() running_IoU += metrics_evaluator.mean_IU( unet_softmax_collupsed.numpy()[0], masks.cpu().numpy()[0]) ind += 1 epoch_loss = running_loss / len(data_loader) epoch_IoU = running_IoU / len(data_loader) print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_IoU)) # deep copy the model if phase == 'val' and epoch_loss < best_val_loss: # TODO add IoU best_val_loss = epoch_loss best_IoU = epoch_IoU best_model_weights = model.state_dict() if phase == 'val': # print(optimizer.param_groups[0]['lr']) curr_val_loss = epoch_loss curr_val_IoU = epoch_IoU else: curr_training_loss = epoch_loss curr_training_IoU = epoch_IoU writer.add_scalars('TrainValIoU', { 'trainIoU': curr_training_IoU, 'validationIoU': curr_val_IoU }, epoch) writer.add_scalars('TrainValLoss', { 'trainLoss': curr_training_loss, 'validationLoss': curr_val_loss }, epoch) # Saving best model torch.save( best_model_weights, os.path.join(path_to_save_best_weights, 'unet{:2f}.pth'.format(best_val_loss))) # Show the timing and final statistics time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best val Loss: {:4f}'.format(best_val_loss)) # TODO add IoU
def test_logsoftmax(self): x = torch.randn(1, 2, 3, 4, requires_grad=True) self.assertONNX(nn.LogSoftmax(dim=3), x)
def __init__(self): super(Net_Head, self).__init__() self.fc2 = nn.Linear(80, 40) self.fc3 = nn.Linear(40, 10) self.logsoftmax = nn.LogSoftmax(dim=-1)
def __init__(self, input_dim, nclass): super(LINEAR_LOGSOFTMAX, self).__init__() self.fc = nn.Linear(input_dim,nclass) self.logic = nn.LogSoftmax(dim=1) self.lossfunction = nn.NLLLoss()
def make_base_model(model_opt, fields, gpu, checkpoint=None, train_part="all"): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img", "audio"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) elif model_opt.model_type == "img": encoder = ImageEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) elif model_opt.model_type == "audio": encoder = AudioEncoder(model_opt.enc_layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout, model_opt.sample_rate, model_opt.window_size) # Make decoder. tgt_dict = fields["tgt"].vocab feature_dicts = onmt.io.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) context = make_context(model_opt, tgt_dict) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder, context, context_type=model_opt.context_type) model.model_type = model_opt.model_type # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model_dict = checkpoint['model'] if train_part == "context": model_dict = model.state_dict() if 'join' in model_opt.context_type: pretrained_dict = {} for k, v in checkpoint['model'].items(): if k in model_dict: if 'doc_context' in k: k = k.replace('doc_context', 'doc_context.0') pretrained_dict[k]=v else: pretrained_dict = {k: v for k, v in checkpoint['model'].items() if k in model_dict and 'doc_context' not in k} model_dict.update(pretrained_dict) model.load_state_dict(model_dict, strict=False) generator.load_state_dict(checkpoint['generator']) if train_part == "context": print("Freezing parameters of main model") for param in model.parameters(): param.require_grad = False for param in generator.parameters(): param.require_grad = False print("Unfreezing parameters of context") for param in model.doc_context.parameters(): param.require_grad = True if model_opt.param_init != 0.0: param.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: if param.dim() > 1: xavier_uniform(param) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform(p) if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() return model
def main(): parser = train_args.get_args() parser.add_argument('--version', action='version', version='%(prog)s ' + __version__ + ' by ' + __author__) cli_args = parser.parse_args() # directory #First check if not os.path.isdir(cli_args.data_directory): print(f'Data directory {cli_args.data_directory} not found.') exit(1) # Then save directory if not os.path.isdir(cli_args.save_dir): print(f'Directory {cli_args.save_dir} does not exist. Creating...') os.makedirs(cli_args.save_dir) with open(cli_args.categories_json, 'r') as f: cat_to_name = json.load(f) output_size = len(cat_to_name) expected_means = [0.485, 0.456, 0.406] expected_std = [0.229, 0.224, 0.225] max_image_size = 224 batch_size = 32 #train_transform tr_transform = transforms.Compose([transforms.RandomHorizontalFlip(p=0.25), transforms.RandomRotation(25), transforms.RandomGrayscale(p=0.02), transforms.RandomResizedCrop(max_image_size), transforms.ToTensor(), transforms.Normalize(expected_means, expected_std)]) #train_dataset tr_dataset = datasets.ImageFolder(cli_args.data_directory, transform=tr_transform) #tr_dataloader tr_dataloader = torch.utils.data.DataLoader(tr_dataset, batch_size=batch_size, shuffle=True) # model if not cli_args.arch.startswith("vgg") and not cli_args.arch.startswith("densenet"): print("Only supporting VGG and DenseNet") exit(1) print(f"Using a pre-trained {cli_args.arch} network.") my_model = models.__dict__[cli_args.arch](pretrained=True) densenet_input = { 'densenet121': 1024, 'densenet169': 1664, 'densenet161': 2208, 'densenet201': 1920 } input_size = 0 if cli_args.arch.startswith("vgg"): input_size = my_model.classifier[0].in_features if cli_args.arch.startswith("densenet"): input_size = densenet_input[cli_args.arch] for param in my_model.parameters(): param.requires_grad = False od = OrderedDict() hidden_sizes = cli_args.hidden_units hidden_sizes.insert(0, input_size) print(f"Building a {len(cli_args.hidden_units)} hidden layer classifier with inputs {cli_args.hidden_units}") for i in range(len(hidden_sizes) - 1): od['fc' + str(i + 1)] = nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]) od['relu' + str(i + 1)] = nn.ReLU() od['dropout' + str(i + 1)] = nn.Dropout(p=0.15) od['output'] = nn.Linear(hidden_sizes[i + 1], output_size) od['softmax'] = nn.LogSoftmax(dim=1) classifier = nn.Sequential(od) # Replace the classifier my_model.classifier = classifier my_model.zero_grad()
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True) valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True) #define models input_size = 784 hidden_sizes = [128, 64] output_size = 10 # Build a feed-forward network model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]), nn.ReLU(), nn.Linear(hidden_sizes[0], hidden_sizes[1]), nn.ReLU(), nn.Linear(hidden_sizes[1], output_size), nn.LogSoftmax(dim=1)) criterion = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9) time0 = time() epochs = 15 for e in range(epochs): running_loss = 0 img = 0 for images, labels in trainloader: # Flatten MNIST images into a 784 long vector images = images.view(images.shape[0], -1) # Clean the gradients
def __init__(self, num_classes, dilation, in_channels=3, depth=5, start_filts=64, up_mode='transpose', merge_mode='concat'): """ Arguments: in_channels: int, number of channels in the input tensor. Default is 3 for RGB images. depth: int, number of MaxPools in the U-Net. start_filts: int, number of convolutional filters for the first conv. up_mode: string, type of upconvolution. Choices: 'transpose' for transpose convolution or 'upsample' for nearest neighbour upsampling. """ super(UNet, self).__init__() if up_mode in ('transpose', 'upsample'): self.up_mode = up_mode else: raise ValueError("\"{}\" is not a valid mode for " "upsampling. Only \"transpose\" and " "\"upsample\" are allowed.".format(up_mode)) if merge_mode in ('concat', 'add'): self.merge_mode = merge_mode else: raise ValueError("\"{}\" is not a valid mode for" "merging up and down paths. " "Only \"concat\" and " "\"add\" are allowed.".format(up_mode)) # NOTE: up_mode 'upsample' is incompatible with merge_mode 'add' if self.up_mode == 'upsample' and self.merge_mode == 'add': raise ValueError("up_mode \"upsample\" is incompatible " "with merge_mode \"add\" at the moment " "because it doesn't make sense to use " "nearest neighbour to reduce " "depth channels (by half).") self.num_classes = num_classes self.in_channels = in_channels self.start_filts = start_filts self.depth = depth self.dilation = dilation self.down_convs = [] self.up_convs = [] self.class_mious = torch.zeros(self.num_classes) self.class_samples = 0 self.sm = nn.LogSoftmax(dim=1) self.ce_loss = nn.CrossEntropyLoss() self.dice_loss = DiceLoss() # create the encoder pathway and add to a list for i in range(depth): ins = self.in_channels if i == 0 else outs outs = self.start_filts * (2**i) pooling = True if i < depth - 1 else False down_conv = DownConv(ins, outs, pooling=pooling, dilation=self.dilation) self.down_convs.append(down_conv) # create the decoder pathway and add to a list # - careful! decoding only requires depth-1 blocks for i in range(depth - 1): ins = outs outs = ins // 2 up_conv = UpConv(ins, outs, up_mode=up_mode, merge_mode=merge_mode, dilation=self.dilation) self.up_convs.append(up_conv) self.conv_final = conv1x1(outs, self.num_classes) # add the list of modules to current module self.down_convs = nn.ModuleList(self.down_convs) self.up_convs = nn.ModuleList(self.up_convs) self.reset_params() self.save_hyperparameters()
def train(train_loaders, model, optimizer, scheduler, epoch): train_loader, noisy_itr = train_loaders kl_avr = AverageMeter() kl_noisy_avr = AverageMeter() lsigmoid = nn.LogSigmoid().cuda() lsoftmax = nn.LogSoftmax(dim=1).cuda() softmax = nn.Softmax(dim=1).cuda() criterion_kl = nn.KLDivLoss().cuda() # switch to train mode model.train() # training preds = np.zeros([0, NUM_CLASS], np.float32) y_true = np.zeros([0, NUM_CLASS], np.float32) preds_noisy = np.zeros([0, NUM_CLASS], np.float32) y_true_noisy = np.zeros([0, NUM_CLASS], np.float32) for i, (input, target) in enumerate(train_loader): # get batches input = torch.autograd.Variable(input.cuda()) target = torch.autograd.Variable(target.cuda()) input_noisy, target_noisy = next(noisy_itr) input_noisy = torch.autograd.Variable(input_noisy.cuda()) target_noisy = torch.autograd.Variable(target_noisy.cuda()) # compute output output = model(input) kl = criterion_kl(lsoftmax(output), target) output_noisy = model.noisy(input_noisy) kl_noisy = criterion_kl(lsoftmax(output_noisy), target_noisy) loss = kl + kl_noisy pred = softmax(output) pred = pred.data.cpu().numpy() pred_noisy = softmax(output_noisy) pred_noisy = pred_noisy.data.cpu().numpy() # backprop optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # record log kl_avr.update(kl.data, input.size(0)) kl_noisy_avr.update(kl_noisy.data, input.size(0)) preds = np.concatenate([preds, pred]) y_true = np.concatenate([y_true, target.data.cpu().numpy()]) preds_noisy = np.concatenate([preds_noisy, pred_noisy]) y_true_noisy = np.concatenate( [y_true_noisy, target_noisy.data.cpu().numpy()]) # calc metric per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap( y_true, preds) lwlrap = np.sum(per_class_lwlrap * weight_per_class) per_class_lwlrap, weight_per_class = calculate_per_class_lwlrap( y_true_noisy, preds_noisy) lwlrap_noisy = np.sum(per_class_lwlrap * weight_per_class) return kl_avr.avg.item(), lwlrap, kl_noisy_avr.avg.item(), lwlrap_noisy
# Download and load the test data testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True) image, label = next(iter(trainloader)) helper.imshow(image[0, :]) # Defining network architecture from torch import nn model = nn.Sequential(nn.Linear(784, 342), nn.ReLU(), nn.Linear(342, 172), nn.ReLU(), nn.Linear(172, 64), nn.ReLU(), nn.Linear(64, 10), nn.LogSoftmax(dim=1)) #Creating the network, defining the criterion and optimizer from torch import optim criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=.003) #Training the network epochs = 5 for e in range(epochs): running_loss = 0 for images, labels in trainloader: images = images.view(images.shape[0], -1)
word_padding_idx=tgt_padding) from onmt.decoders.decoder import InputFeedRNNDecoder as InputFeedRNNDecoder decoder = InputFeedRNNDecoder(hidden_size=rnn_size, num_layers=1, bidirectional_encoder=True, rnn_type="LSTM", embeddings=decoder_embeddings) # from onmt.models.model import NMTModel as NMTModel model = onmt.models.model.NMTModel(encoder, decoder) # Specify the tgt word generator and loss computation module model.generator = nn.Sequential(nn.Linear(rnn_size, len(vocab["tgt"])), nn.LogSoftmax()) loss = onmt.utils.loss.NMTLossCompute(model.generator, vocab["tgt"]) # up the optimizer optim = onmt.utils.optimizers.Optim(method="sgd", learning_rate=1, max_grad_norm=2) optim.set_parameters(model.named_parameters()) # Load some data data = torch.load("../../data/data.train.pt") valid_data = torch.load("../../data/data.valid.pt") data.load_fields(vocab)
batch_size=64, shuffle=True) #return model from argument ref https://knowledge.udacity.com/questions/479950 Thank you Arun! model = getattr(models, arch)(pretrained=True) #Turn off gradients for param in model.parameters(): param.requires_grad = False #replace classifier classifier = nn.Sequential( OrderedDict([('fc1', nn.Linear(25088, hidden_units)), ('relu', nn.ReLU()), ('dropout', nn.Dropout(p=0.2)), ('fc2', nn.Linear(hidden_units, 102)), ('output', nn.LogSoftmax(dim=1))])) model.classifier = classifier criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=lrn) model.to(device) print(model) # Prints out training loss, validation loss, and validation accuracy as the network trains steps = 0 running_loss = 0 print_every = 20 for epoch in range(epochs): for images, labels in training_dataloader: steps += 1
def make_base_model(model_opt, fields, gpu, checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. Returns: the NMTModel. """ assert model_opt.model_type in ["text", "img"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Make encoder. if model_opt.model_type == "text": src_dict = fields["src"].vocab feature_dicts = onmt.IO.collect_feature_dicts(fields, 'src') src_embeddings = make_embeddings(model_opt, src_dict, feature_dicts) encoder = make_encoder(model_opt, src_embeddings) else: encoder = ImageEncoder(model_opt.layers, model_opt.brnn, model_opt.rnn_size, model_opt.dropout) # Make decoder. tgt_dict = fields["tgt"].vocab # TODO: prepare for a future where tgt features are possible. feature_dicts = onmt.IO.collect_feature_dicts(fields, 'tgt') tgt_embeddings = make_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required if model_opt.share_embeddings: tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = make_decoder(model_opt, tgt_embeddings) # Make NMTModel(= encoder + decoder). model = NMTModel(encoder, decoder) # Make Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax()) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt, fields["src"].vocab, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: print('Loading model parameters.') model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: print('Intializing model parameters.') for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator # Make the whole model leverage GPU if indicated to do so. if gpu: model.cuda() else: model.cpu() # device = torch.device("cuda" if gpu else "cpu") # model.to(device) return model
def build_base_model(cls, src_types: List[str], model_opt, fields, gpu, checkpoint=None, gpu_id=None ): """Build a model from opts. Args: model_opt: the option loaded from checkpoint. It's important that the opts have been updated and validated. See :class:`onmt.utils.parse.ArgumentParser`. fields (dict[str, torchtext.data.Field]): `Field` objects for the model. gpu (bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. gpu_id (int or NoneType): Which GPU to use. Returns: the NMTModel. """ # for back compat when attention_dropout was not defined try: model_opt.attention_dropout except AttributeError: model_opt.attention_dropout = model_opt.dropout # for finding type token indices type_token_dict = {"out-std-logic": fields["src.l"][0][1].vocab.stoi["out-std-logic"], "out-std-logic-vector": fields["src.l"][0][1].vocab.stoi["out-std-logic-vector"], "std-logic": fields["src.l"][0][1].vocab.stoi["std-logic"], "std-logic-vector": fields["src.l"][0][1].vocab.stoi["std-logic-vector"], "inout-std-logic": fields["src.l"][0][1].vocab.stoi["inout-std-logic"], "inout-std-logic-vector": fields["src.l"][0][1].vocab.stoi["inout-std-logic-vector"], "signed": fields["src.l"][0][1].vocab.stoi["signed"], "unsigned": fields["src.l"][0][1].vocab.stoi["unsigned"], "out-startaddr-array-type": fields["src.l"][0][1].vocab.stoi["out-startaddr-array-type"], "std-ulogic": fields["src.l"][0][1].vocab.stoi["std-ulogic"], "boolean": fields["src.l"][0][1].vocab.stoi["boolean"], "<unk>": fields["src.l"][0][1].vocab.stoi["<unk>"], "<pad>": fields["src.l"][0][1].vocab.stoi["<pad>"] } # Build embeddings. src_embs: Dict[str, Optional[nn.Module]] = dict() # PN: we always have text srcs for now for src_type in src_types: if src_type!="type": src_field = fields[f"src.{src_type}"] src_embs[src_type] = cls.build_embeddings(model_opt, src_field) # end for # Build encoders. encoders: Dict[str, EncoderBase] = dict() for src_i, src_type in enumerate(src_types): if src_type!="type" and src_type!="patype": encoder = cls.build_encoder(model_opt, src_embs[src_type], src_type, type_indx=type_token_dict) encoders[src_type] = encoder # end for # Build decoder. tgt_field = fields["tgt"] tgt_emb = cls.build_embeddings(model_opt, tgt_field, for_encoder=False) # No share embedding in this model assert not model_opt.share_embeddings, "share embeddings not supported" # # Share the embedding matrix - preprocess with share_vocab required. # if model_opt.share_embeddings: # # src/tgt vocab should be the same if `-share_vocab` is specified. # assert src_field.base_field.vocab == tgt_field.base_field.vocab, \ # "preprocess with -share_vocab if you use share_embeddings" # # tgt_emb.word_lut.weight = src_emb.word_lut.weight decoder = cls.build_decoder(model_opt, tgt_emb) model_opt.src_types.append("type") model_opt.src_types.append("patype") # Build MultiSourceNMTModel(= encoders + decoder). if gpu and gpu_id is not None: device = torch.device("cuda", gpu_id) elif gpu and not gpu_id: device = torch.device("cuda") elif not gpu: device = torch.device("cpu") # end if model = MultiSourceTypeAppendedModel(encoders, decoder) # Build Generator. if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = onmt.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.dec_rnn_size, len(fields["tgt"].base_field.vocab)), Cast(torch.float32), gen_func ) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: tgt_base_field = fields["tgt"].base_field vocab_size = len(tgt_base_field.vocab) pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token] generator = MultiSourceCopyGenerator(model_opt.dec_rnn_size, vocab_size, pad_idx) # Load the model states from checkpoint or initialize them. if checkpoint is not None: # This preserves backward-compat for models using customed layernorm def fix_key(s): s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2', r'\1.layer_norm\2.bias', s) s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2', r'\1.layer_norm\2.weight', s) return s checkpoint['model'] = {fix_key(k): v for k, v in checkpoint['model'].items()} # end of patch for backward compatibility model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) for encoder in model.encoders: if hasattr(encoder, 'embeddings'): encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec) model.generator = generator model.to(device) print(model) return model
def __init__(self, device, para_LSR=0.2): super(CrossEntropyLoss_LSR, self).__init__() self.para_LSR = para_LSR self.device = device self.logSoftmax = nn.LogSoftmax(dim=-1)
def main(): global print_freq, best_prec1 print_freq = 10 #args = parser.parse_args() df = pd.read_csv( '/hampiholidata/Project/Datasets/imdb/Augment/IMDB_Wiki_Adience_Train2000_undersample_dis.csv' ) wghts = df['Weights'] nwghts = np.array(wghts) weights = torch.from_numpy(nwghts) weights_var = weights.float().cuda() #npclasses= torch.from_numpy(classes) #nclasses= npclasses.float().cuda() # create model model = models.vgg16(pretrained=True) model = nn.Sequential( model, nn.LogSoftmax(), #nn.Dropout(p=0.7), nn.Linear(1000, 100) #nn.ReLU(True), #nn.Dropout(p=0.8), #nn.Linear(500, 81) ) model.cuda() print(model) # define loss function (criterion) and optimizer criterion1 = nn.CrossEntropyLoss().cuda() criterion2 = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), L_Rate, momentum=0.9, weight_decay=1e-4) cudnn.benchmark = True #Normalize the images normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #Loading the data transformed_train_dataset = AgeEstimationDataset( csv_file='imdb/Augment/IMDB_Wiki_Adience_Train2000_undersample.csv', root_dir='imdb/', transform=transforms.Compose([ Rescale(256), RandomCrop(224), ToTensor() #normalize ])) transformed_valid_dataset = AgeEstimationDataset( csv_file='imdb/Augment/IMDB_Wiki_Adience_Val.csv', root_dir='imdb/', transform=transforms.Compose([ Rescale(256), RandomCrop(224), ToTensor() #normalize ])) train_loader = torch.utils.data.DataLoader(transformed_train_dataset, batch_size=32, shuffle=True, num_workers=8) val_loader = torch.utils.data.DataLoader(transformed_valid_dataset, batch_size=32, shuffle=True, num_workers=8) start_time = time.time() for epoch in range(Startepoch, Endepoch): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion1, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion2, epoch) # write results to a txt file # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': 'vgg16', 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, epoch) end_time = time.time() duration = (end_time - start_time) / 3600 print("Duration:") print(duration)
def build_model(gpu, arch='resnet18', learning_rate=0.001, hidden_units=250, features=0, train_dir='flowers/train/'): if gpu == True: device = torch.device('cuda') else: device = torch.device('cpu') print(type(arch)) model, features = utils.get_model(arch) for param in model.parameters(): param.requires_grad = False n_output = sum( os.path.isdir(os.path.join(train_dir, i)) for i in os.listdir(train_dir)) print(n_output) if arch in [ 'resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50', 'inception_v3' ]: model.fc = nn.Sequential(nn.Linear(features, int(hidden_units)), nn.ReLU(), nn.Dropout(0.2), nn.Linear(int(hidden_units), n_output), nn.LogSoftmax(dim=1)) criterion = nn.NLLLoss() optimizer = optim.Adam(model.fc.parameters(), lr=float(learning_rate)) model.to(device) elif arch in ['densenet121', 'densenet161', 'densenet169', 'densenet201']: model.classifier = nn.Sequential( nn.Linear(features, int(hidden_units)), nn.ReLU(), nn.Dropout(0.2), nn.Linear(int(hidden_units), n_output), nn.LogSoftmax(dim=1)) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=float(learning_rate)) model.to(device) elif arch in ['alexnet']: model.classifier = nn.Sequential( nn.Linear(9216, int(hidden_units)), nn.ReLU(), nn.Dropout(0.2), nn.Linear(int(hidden_units), n_output), nn.LogSoftmax(dim=1)) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=float(learning_rate)) model.to(device) elif arch in ['squeezenet1_0', 'squeezenet1_1']: model.classifier[1] = nn.Conv2d(512, 102, kernel_size=(1, 1), stride=(1, 1)) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=float(learning_rate)) model.to(device) else: model.classifier = nn.Sequential( nn.Linear(25088, int(hidden_units)), nn.ReLU(), nn.Dropout(0.2), nn.Linear(int(hidden_units), n_output), nn.LogSoftmax(dim=1)) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=float(learning_rate)) model.to(device) return model, device, optimizer, criterion, features
def main(opts): dset = SpkDataset(opts.db_path, opts.tr_list_file, opts.ext, opts.spk2idx, in_frames=opts.in_frames) dloader = DataLoader(dset, batch_size=opts.batch_size, num_workers=1, shuffle=True, pin_memory=False) va_dset = SpkDataset(opts.db_path, opts.va_list_file, opts.ext, opts.spk2idx, in_frames=opts.in_frames) va_dloader = DataLoader(va_dset, batch_size=opts.batch_size, num_workers=1, shuffle=True, pin_memory=False) opts.input_dim = dset.input_dim opts.num_spks = dset.num_spks # save training config with open(os.path.join(opts.save_path, 'train.opts'), 'w') as opts_f: opts_f.write(json.dumps(vars(opts), indent=2)) # Feed Forward Neural Network model = nn.Sequential( nn.Linear(dset.input_dim * dset.in_frames, opts.hsize), nn.ReLU(), nn.Linear(opts.hsize, opts.hsize), nn.ReLU(), nn.Linear(opts.hsize, opts.hsize), nn.ReLU(), nn.Linear(opts.hsize, dset.num_spks), nn.LogSoftmax(dim=1)) print('Created model:') print(model) print('-') #opt = optim.SGD(model.parameters(), lr=opts.lr, momentum=opts.momentum) opt = optim.Adam(model.parameters(), lr=opts.lr) tr_loss = [] tr_acc = [] va_loss = [] va_acc = [] best_val = np.inf # patience factor to validate data and get out of train earlier # of things do not improve in the held out dataset patience = opts.patience for epoch in range(opts.epoch): tr_loss_, tr_acc_ = train_spkid_epoch(dloader, model, opt, epoch, opts.log_freq) va_loss_, va_acc_ = eval_spkid_epoch(va_dloader, model, epoch, opts.log_freq) if best_val <= va_loss_[0]: patience -= 1 print('Val loss did not improve. Patience ' '{}/{}.'.format(patience, opts.patience)) if patience <= 0: print('Breaking train loop: Out of patience') break mname = os.path.join(opts.save_path, 'e{}_weights.ckpt'.format(epoch)) else: # reset patience print('Val loss improved {:.3f} -> {:.3f}'.format( best_val, va_loss_[0])) best_val = va_loss_[0] patience = opts.patience mname = os.path.join(opts.save_path, 'bestval_e{}_weights.ckpt'.format(epoch)) # save model torch.save(model.state_dict(), mname) tr_loss += tr_loss_ tr_acc += tr_acc_ va_loss += va_loss_ va_acc += va_acc_ stats = { 'tr_loss': tr_loss, 'tr_acc': tr_acc, 'va_loss': va_loss, 'va_acc': va_acc } with open(os.path.join(opts.save_path, 'train_stats.json'), 'w') as stats_f: stats_f.write(json.dumps(stats, indent=2)) # plot training loss/acc and eval loss/acc plt.figure(figsize=(15, 10)) plt.subplot(2, 2, 1) plt.plot(tr_loss) plt.xlabel('Global step') plt.ylabel('Train NLL Loss') plt.subplot(2, 2, 2) plt.plot(tr_acc) plt.xlabel('Global step') plt.ylabel('Train Accuracy') plt.subplot(2, 2, 3) plt.plot(va_loss) plt.xlabel('Epoch') plt.ylabel('Eval NLL Loss') plt.subplot(2, 2, 4) plt.plot(va_acc) plt.xlabel('Epoch') plt.ylabel('Eval Accuracy') plt.savefig(os.path.join(opts.save_path, 'log_plots.png'), dpi=200) plt.close()
def main(): ### define transformations for the data train_transforms = transforms.Compose([ transforms.RandomRotation(60), transforms.Resize(255), transforms.CenterCrop(224), transforms.RandomHorizontalFlip(30), transforms.ColorJitter(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) valid_transforms = transforms.Compose([ transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) test_transforms = transforms.Compose([ transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) ### define paths to the train, validation, and test data sets train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' ### load in the datasets train_data = datasets.ImageFolder(train_dir, transform=train_transforms) valid_data = datasets.ImageFolder(valid_dir, transform=valid_transforms) test_data = datasets.ImageFolder(test_dir, transform=test_transforms) ### set up dataloaders trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True) validloader = torch.utils.data.DataLoader(valid_data, batch_size=64, shuffle=True) testloader = torch.utils.data.DataLoader(test_data, batch_size=64) ### define processor device = torch.device(dev) print("using device '{}'".format(device)) ### define model architecture and optimizer if arch == 'vgg16': model = models.vgg16(pretrained=True) #class_in = 25088 else: model = models.densenet161(pretrained=True) #class_in = 2208 class_in = model.classifier.in_features for param in model.parameters(): param.requires_grad = False model.classifier = nn.Sequential(nn.Linear(class_in, 2000), nn.ReLU(), nn.Dropout(p=0.2), nn.Linear(2000, 512), nn.ReLU(), nn.Dropout(p=0.2), nn.Linear(512, 102), nn.LogSoftmax(dim=1)) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=learnrate) model = model.to(device) ### train the network epochs = epoch_count training_losses = [] validation_losses = [] model.train() for e in range(epochs): running_loss = 0 for images, labels in trainloader: images = images.to(device) labels = labels.to(device) #print("image shape: '{}'".format(images.shape)) optimizer.zero_grad() log_ps = model.forward(images) loss = criterion(log_ps, labels) loss.backward() optimizer.step() #print("loss: {}".format(loss.item())) running_loss += loss.item() else: valid_loss = 0 accuracy = 0 with torch.no_grad(): model.eval() for images, labels in validloader: images, labels = images.to(device), labels.to(device) logps = model.forward(images) valid_loss += criterion(logps, labels) #print("step: {}, valid_loss: {}".format(e, valid_loss)) ps = torch.exp(logps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type( torch.FloatTensor)).item() model.train() training_losses.append(running_loss / len(trainloader)) validation_losses.append(valid_loss / len(validloader)) print("Epoch: {}/{}.. ".format(e + 1, epochs), "Training Loss: {:.3f}.. ".format(training_losses[-1]), "Test Loss: {:.3f}.. ".format(validation_losses[-1]), "Test Accuracy: {:.3f}".format(accuracy / len(validloader))) ### map from integer values to flower names with open('cat_to_name.json', 'r') as f: cat_to_name = json.load(f) ### attach map as a parameter to the model model.class_to_idx = train_data.class_to_idx ### save model parameters checkpoint = { 'input size': 25088, 'output size': 102, 'epochs': epochs, 'model': model, 'classifier': nn.Sequential(nn.Linear(class_in, 2000), nn.ReLU(), nn.Dropout(p=0.2), nn.Linear(2000, 512), nn.ReLU(), nn.Dropout(p=0.2), nn.Linear(512, 102), nn.LogSoftmax(dim=1)), #'classifier': model.classifier(), 'optimizer': optimizer.state_dict(), 'class_to_idx': model.class_to_idx } torch.save(checkpoint, 'checkpoint.pth') # save the state dict torch.save(model.state_dict(), 'state_dict.pth')
def __init__(self, num_classes, epsilon): super(CrossEntropyLabelSmooth, self).__init__() self.num_classes = num_classes self.epsilon = epsilon self.logsoftmax = nn.LogSoftmax(dim=1)
def create_model(model_name) if (model_name.lower()=="vgg16"): model =models.vgg16(pretrained=True) else if (model_name.lower()=="vgg19"): model=models.vgg19(pretrained=True) print(model) for param in model.parameters(): param.requires_grad=False classifier = nn.Sequential(OrderedDict([ ('fc1', nn.Linear(25088, 4096)), # First layer ('relu', nn.ReLU()), # Apply activation function ('fc2', nn.Linear(4096, 102)), # Output layer ('output', nn.LogSoftmax(dim=1)) # Apply loss function ])) model.classifier = classifier def validate(model, criterion, data_loader): model.eval() # Puts model into validation mode accuracy = 0 test_loss = 0 for inputs, labels in iter(data_loader): if torch.cuda.is_available(): inputs = Variable(inputs.float().cuda(), volatile=True) labels = Variable(labels.long().cuda(), volatile=True) else:
def build_end2end_model(model_opt, fields, gpu, checkpoint=None, sel_checkpoint=None, s2s_gen_checkpoint=None): """ Args: model_opt: the option loaded from checkpoint. fields: `Field` objects for the model. gpu(bool): whether to use gpu. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. sel_checkpoint: a pretrained selector. Returns: the E2EModel. """ assert model_opt.model_type in ["text"], \ ("Unsupported model type %s" % (model_opt.model_type)) # Build selector src_dict = fields["src"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'src') sel_src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) selector = build_selector(model_opt, sel_src_embeddings) # Build encoder if model_opt.e2e_type == "separate_enc_sel": if model_opt.selector_share_embeddings: # the shared embeddings are in the encoder.embeddings # TODO: change the state name to load the embeddings in the pretrained selector embeddings assert model_opt.load_pretrained_selector_from == '' src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) src_embeddings.word_lut.weight = sel_src_embeddings.word_lut.weight else: src_embeddings = build_embeddings(model_opt, src_dict, feature_dicts) encoder = build_encoder(model_opt, src_embeddings) else: # model_opt.e2e_type == "share_enc_sel" src_embeddings = sel_src_embeddings encoder = None # build rk_encoder rk_encoder = None if model_opt.use_retrieved_keys: rk_encoder = build_rk_encoder(model_opt, sel_src_embeddings) # Build decoder tgt_dict = fields["tgt"].vocab feature_dicts = inputters.collect_feature_vocabs(fields, 'tgt') tgt_embeddings = build_embeddings(model_opt, tgt_dict, feature_dicts, for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: # src/tgt vocab should be the same if `-share_vocab` is specified. if src_dict != tgt_dict: raise AssertionError('The `-share_vocab` should be set during ' 'preprocess if you use share_embeddings!') tgt_embeddings.word_lut.weight = src_embeddings.word_lut.weight decoder = build_decoder(model_opt, tgt_embeddings) # Build E2EModel(= encoders + selector + decoder). device = torch.device("cuda" if gpu else "cpu") model = onmt.models.E2EModel(encoder, rk_encoder, selector, decoder, e2e_type=model_opt.e2e_type, use_gt_sel_probs=model_opt.use_gt_sel_probs, rk_to_src_attn=model_opt.rk_to_src_attn) model.model_type = model_opt.model_type # Build Generator. if not model_opt.copy_attn: generator = nn.Sequential( nn.Linear(model_opt.rnn_size, len(fields["tgt"].vocab)), nn.LogSoftmax(dim=-1)) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: generator = CopyGenerator(model_opt.rnn_size, fields["tgt"].vocab) # Load the model states from checkpoint or initialize them. if checkpoint is not None: model.load_state_dict(checkpoint['end2end_model']) generator.load_state_dict(checkpoint['generator']) else: if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) if sel_checkpoint is not None: model.load_state_dict(sel_checkpoint['selector'], strict=False) if s2s_gen_checkpoint is not None: model.load_state_dict(s2s_gen_checkpoint['model'], strict=False) generator.load_state_dict(s2s_gen_checkpoint['generator']) # if hasattr(model.encoder, 'embeddings'): # model.encoder.embeddings.load_pretrained_vectors( # model_opt.pre_word_vecs_enc, model_opt.fix_word_vecs_enc) # if hasattr(model.decoder, 'embeddings'): # model.decoder.embeddings.load_pretrained_vectors( # model_opt.pre_word_vecs_dec, model_opt.fix_word_vecs_dec) # Add generator to model (this registers it as parameter of model). model.generator = generator model.to(device) return model
transform=RPFAugmentation(size=args.img_size)) valid_dataset = HabitatDataset(args, valid_list, transform=RPFAugmentation(size=args.img_size)) rpf = build_net('train', args) if torch.cuda.device_count() > 1: print("Use", torch.cuda.device_count(), "GPUs") rpf = nn.DataParallel(rpf) if args.cuda: rpf.cuda() cudnn.benchmark = True criterion = nn.CrossEntropyLoss() logsoftmax = nn.LogSoftmax() optimizer = optim.Adam(rpf.parameters(), lr=args.lr, weight_decay=args.wd) def train(): rpf.train() lr = args.lr epoch = disp_loss = 0 eval_loss = 10000. start_time = time.time() epoch_size = len(train_dataset) // args.batch_size max_epoch = int(args.max_iter / epoch_size) step_values = [10000, 50000, 100000] step_index = 0 batch_iterator = data.DataLoader(dataset=train_dataset, batch_size=args.batch_size,
def get_generator(vocab_size, dec_hidden_size, device): gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential(nn.Linear(dec_hidden_size, vocab_size), gen_func) generator.to(device) return generator
def cross_entropy( pred, soft_targets ): # use nn.CrossEntropyLoss if not using soft labels in Line 159 logsoftmax = nn.LogSoftmax(dim=1) soft_targets = soft_targets.to(device) return torch.mean(torch.sum(-soft_targets * logsoftmax(pred), 1))