def __init__(self, growth_rate, block_config, num_init_features, num_classes, bn_size=4, drop_rate=0, memory_efficient=False, pretrained=None, optimizer_name="Adam", learning_rate=1e-3, loss_name="NLLLoss", metrics=None, use_cuda=False, **kwargs): """ Class initilization. Parameters ---------- growth_rate: int how many filters to add each layer ('k' in paper). block_config: 1-uplet how many layers in each pooling block. num_init_features: int the number of filters to learn in the first convolution layer. num_classes: int number of classification classes. bn_size: int, default 4 multiplicative factor for number of bottle neck layers (i.e. bn_size * k features in the bottleneck layer). drop_rate: float, default 0 dropout rate after each dense layer. memory_efficient: bool, default False if True, uses checkpointing. Much more memory efficient, but slower. pretrained: str, default None update the weights of the model using this state information. optimizer_name: str, default 'Adam' the name of the optimizer: see 'torch.optim' for a description of available optimizer. learning_rate: float, default 1e-3 the optimizer learning rate. loss_name: str, default 'NLLLoss' the name of the loss: see 'torch.nn' for a description of available loss. metrics: list of str a list of extra metrics that will be computed. use_cuda: bool, default False wether to use GPU or CPU. kwargs: dict specify directly a custom 'optimizer' or 'loss'. Can also be used to set specific optimizer parameters. """ self.model = models.DenseNet( growth_rate=growth_rate, block_config=block_config, num_init_features=num_init_features, bn_size=bn_size, drop_rate=drop_rate, num_classes=num_classes, memory_efficient=memory_efficient) super().__init__( optimizer_name=optimizer_name, learning_rate=learning_rate, loss_name=loss_name, metrics=metrics, use_cuda=use_cuda, pretrained=pretrained, **kwargs)
def download_model(saving_path='.'): # inception net # model = models.Inception3() # model.load_state_dict(model_zoo.load_url(model_urls['inception_v3_google'], model_dir=saving_path, progress=True)) # resnet model = models.ResNet(_Bottleneck, [3, 8, 36, 3]) model.load_state_dict(model_zoo.load_url(model_urls['resnet152'], model_dir=saving_path, progress=True)) # save_model(model, 'resnet152.pkl', saving_path) # alex net model = models.AlexNet() model.load_state_dict(model_zoo.load_url(model_urls['alexnet'], model_dir=saving_path, progress=True)) # save_model(model, 'alexnet.pkl', saving_path) # vgg model = models.VGG(_vgg_make_layers(_vgg_cfg['E'], batch_norm=True), init_weights=False) model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn'], model_dir=saving_path, progress=True)) # save_model(model, 'vgg19.pkl', saving_path) # squeeze net model = models.SqueezeNet(version=1.1) model.load_state_dict(model_zoo.load_url(model_urls['squeezenet1_1'], model_dir=saving_path, progress=True)) # save_model(model, 'squeezenet1_1.pkl', saving_path) # dense net model = models.DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32)) pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') state_dict = model_zoo.load_url(model_urls['densenet201'], model_dir=saving_path, progress=True) for key in list(state_dict.keys()): res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) state_dict[new_key] = state_dict[key] del state_dict[key] model.load_state_dict(state_dict) # save_model(model, 'densenet201.pkl', saving_path) # googlenet kwargs = dict() kwargs['transform_input'] = True kwargs['aux_logits'] = False # if kwargs['aux_logits']: # warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, ' # 'so make sure to train them') original_aux_logits = kwargs['aux_logits'] kwargs['aux_logits'] = True kwargs['init_weights'] = False model = models.GoogLeNet(**kwargs) model.load_state_dict(model_zoo.load_url(model_urls['googlenet'])) if not original_aux_logits: model.aux_logits = False del model.aux1, model.aux2 # save_model(model, 'googlenet.pkl', saving_path) # resnext model = models.resnext101_32x8d(pretrained=False) model.load_state_dict(model_zoo.load_url(model_urls['resnext101_32x8d'], model_dir=saving_path, progress=True))
def load_model_merged(name, num_classes): model = models.__dict__[name](num_classes=num_classes) # Densenets don't (yet) pass on num_classes, hack it in if "densenet" in name: if name == 'densenet169': return models.DenseNet(num_init_features=64, growth_rate=32, \ block_config=(6, 12, 32, 32), num_classes=num_classes) elif name == 'densenet121': return models.DenseNet(num_init_features=64, growth_rate=32, \ block_config=(6, 12, 24, 16), num_classes=num_classes) elif name == 'densenet201': return models.DenseNet(num_init_features=64, growth_rate=32, \ block_config=(6, 12, 48, 32), num_classes=num_classes) elif name == 'densenet161': return models.DenseNet(num_init_features=96, growth_rate=48, \ block_config=(6, 12, 36, 24), num_classes=num_classes) else: raise ValueError( "Cirumventing missing num_classes kwargs not implemented for %s" % name) pretrained_state = model_zoo.load_url(model_urls[name]) #Diff diff = [s for s in diff_states(model.state_dict(), pretrained_state)] print("Replacing the following state from initialized", name, ":", [d[0] for d in diff]) for name, value in diff: pretrained_state[name] = value assert len([s for s in diff_states(model.state_dict(), pretrained_state) ]) == 0 #Merge model.load_state_dict(pretrained_state) return model, diff
def get_model(model_name, output_size, pretrained=True, feature_size=256, pool=False, dropout=0.1, amp=False): if model_name.startswith('resne'): m = getattr(models, model_name) model = m(pretrained=pretrained) last_num = model.fc.in_features last = 'fc' elif model_name.startswith('se'): model = pretrainedmodels.__dict__[model_name]( num_classes=1000, pretrained='imagenet' if pretrained else None) model.dropout = None last_num = model.last_linear.in_features last = 'last_linear' elif model_name.startswith('densenet'): m = getattr(models, model_name) model = m(pretrained=pretrained) last_num = model.classifier.in_features last = 'classifier' elif model_name.startswith('my_densenet'): model = models.DenseNet(32, block_config=(6, 12, 32), num_init_features=64, num_classes=output_size) elif model_name in [ 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3' ] or model_name.startswith('tf_'): model = geffnet.create_model(model_name, pretrained=pretrained) last_num = model.classifier.in_features last = 'classifier' else: raise ValueError('no model named ' + model_name) if not pool: setattr(model, last, nn.Linear(last_num, feature_size)) else: setattr( model, last, nn.Sequential(NoopAddDim(1), nn.AdaptiveMaxPool1d(feature_size), NoopSqueezeDim(1))) if amp: model = AutocastModule(model) sq = OrderedDict([('wso', SplitCT()), ('base_model', model), ('drop_out', nn.Dropout(dropout)), ('last_linear', nn.Linear(feature_size, output_size))]) return nn.Sequential(sq)
def load_model(model_name): global MODEL_NAME # Detect if we have a GPU available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if model_name == 'ResNet': model = models.resnet152(pretrained=False) model.load_state_dict(torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME[model_name]), map_location=device)) elif model_name == 'AlexNet': model = models.AlexNet() model.load_state_dict(torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME[model_name]), map_location=device)) elif model_name == 'VGG': model = models.VGG(_vgg_make_layers(_vgg_cfg['E'], batch_norm=True), init_weights=False) model.load_state_dict(torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME[model_name]), map_location=device)) elif model_name == 'DenseNet': model = models.DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32)) pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') state_dict = torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME[model_name]), map_location=device) for key in list(state_dict.keys()): res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) state_dict[new_key] = state_dict[key] del state_dict[key] model.load_state_dict(state_dict) elif model_name == 'GoogleNet': # googlenet kwargs = dict() kwargs['transform_input'] = True kwargs['aux_logits'] = False # if kwargs['aux_logits']: # warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, ' # 'so make sure to train them') original_aux_logits = kwargs['aux_logits'] kwargs['aux_logits'] = True kwargs['init_weights'] = False model = models.GoogLeNet(**kwargs) model.load_state_dict(torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME[model_name]), map_location=device)) if not original_aux_logits: model.aux_logits = False del model.aux1, model.aux2 elif model_name == 'ResNext101': model = models.resnext101_32x8d(pretrained=False) model.load_state_dict(torch.load(os.path.join(PRETRAINED_DEEP_MODEL_DIR, MODEL_NAME[model_name]), map_location=device)) else: raise ValueError("Model name must be one of ['VGG', 'ResNet', 'DenseNet', 'AlexNet', 'GoogleNet', 'ResNext101']") return model
def get_model(model_path=None): model = models.DenseNet(32, (6, 12, 24, 16), 64) pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' ) state_dict = torch.load(model_path, map_location=None) for key in list(state_dict.keys()): res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) state_dict[new_key] = state_dict[key] del state_dict[key] model.load_state_dict(state_dict) model.eval() return model
def load_densenet(self, model_path): """加载densenet121预训练模型;""" model = models.DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16)) pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' ) state_dict = torch.load(model_path) for key in list(state_dict.keys()): res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) state_dict[new_key] = state_dict[key] del state_dict[key] model.load_state_dict(state_dict) return model.features
def main(string_name, nr_epochs, batch_size): data_dir = '/home/efklidis/Desktop/Front_back/data/' print("Initializing dataset...") image_datasets = dataset(data_dir) print("Dataset is loaded.") print("\nInitializing dataloaders...") dataloaders = {'train':torch.utils.data.DataLoader(image_datasets['train'], batch_size=batch_size, shuffle=True, num_workers=4), 'val':torch.utils.data.DataLoader(image_datasets['val'], batch_size=batch_size, shuffle=False, num_workers=4)} print("Dataloaders are booted.") print("\nInitializing viewpoint classification model and loading the coarse trained one...") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") coarse_model = models.DenseNet() coarse_wts = coarse_model.state_dict() trained_dict = torch.load('/home/efklidis/Desktop/triplet_BA.pt', map_location='cpu' ) trained_dict = {k: v for k, v in trained_dict.items() if k in coarse_wts} coarse_wts.update(trained_dict) coarse_model.load_state_dict(coarse_wts) coarse_model = coarse_model.to(device) view_cls = torch.nn.Sequential(torch.nn.Linear(1024, 256), torch.nn.ReLU(), torch.nn.Linear(256, 2)).to(device) print("Models are mounted on device:", str(device)) print("\nInitializing optimizer...") criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(view_cls.parameters(), lr=0.01, momentum=0.9) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[40, 80, 100, 120, 140, 160, 180], gamma=0.9) print("Set.") print("\nTraining for ", str(nr_epochs), "epochs...") view_cls = train_model(view_cls, coarse_model, dataloaders, criterion, optimizer, nr_epochs, scheduler, string_name, device)
def _get_untrained_model(model_name, num_classes): """ Primarily, this method exists to return an untrained / vanilla version of a specified (pretrained) model. This is on best-attempt basis only and may be out of sync with actual model definitions. The code is manually maintained. :param model_name: Lower-case model names are pretrained by convention. :param num_classes: Number of classes to initialize the vanilla model with. :return: default model for the model_name with custom number of classes """ if model_name.startswith('bninception'): return classification.BNInception(num_classes=num_classes) elif model_name.startswith('densenet'): return torch_models.DenseNet(num_classes=num_classes) elif model_name.startswith('dpn'): return classification.DPN(num_classes=num_classes) elif model_name.startswith('inceptionresnetv2'): return classification.InceptionResNetV2(num_classes=num_classes) elif model_name.startswith('inception_v3'): return torch_models.Inception3(num_classes=num_classes) elif model_name.startswith('inceptionv4'): return classification.InceptionV4(num_classes=num_classes) elif model_name.startswith('nasnetalarge'): return classification.NASNetALarge(num_classes=num_classes) elif model_name.startswith('nasnetamobile'): return classification.NASNetAMobile(num_classes=num_classes) elif model_name.startswith('pnasnet5large'): return classification.PNASNet5Large(num_classes=num_classes) elif model_name.startswith('polynet'): return classification.PolyNet(num_classes=num_classes) elif model_name.startswith('pyresnet'): return classification.PyResNet(num_classes=num_classes) elif model_name.startswith('resnet'): return torch_models.ResNet(num_classes=num_classes) elif model_name.startswith('resnext101_32x4d'): return classification.ResNeXt101_32x4d(num_classes=num_classes) elif model_name.startswith('resnext101_64x4d'): return classification.ResNeXt101_64x4d(num_classes=num_classes) elif model_name.startswith('se_inception'): return classification.SEInception3(num_classes=num_classes) elif model_name.startswith('se_resnext50_32x4d'): return classification.se_resnext50_32x4d(num_classes=num_classes, pretrained=None) elif model_name.startswith('se_resnext101_32x4d'): return classification.se_resnext101_32x4d(num_classes=num_classes, pretrained=None) elif model_name.startswith('senet154'): return classification.senet154(num_classes=num_classes, pretrained=None) elif model_name.startswith('se_resnet50'): return classification.se_resnet50(num_classes=num_classes, pretrained=None) elif model_name.startswith('se_resnet101'): return classification.se_resnet101(num_classes=num_classes, pretrained=None) elif model_name.startswith('se_resnet152'): return classification.se_resnet152(num_classes=num_classes, pretrained=None) elif model_name.startswith('squeezenet1_0'): return torch_models.squeezenet1_0(num_classes=num_classes, pretrained=False) elif model_name.startswith('squeezenet1_1'): return torch_models.squeezenet1_1(num_classes=num_classes, pretrained=False) elif model_name.startswith('xception'): return classification.Xception(num_classes=num_classes) else: raise ValueError( 'No vanilla model found for model name: {}'.format(model_name))
def __init__(self, block_config=(3, 6, 12, 8), latentspace=200): super(DenseNet60, self).__init__() original_model = models.DenseNet(block_config=block_config) self.features = nn.Sequential(*list(original_model.children())[:-1]) self.classifier = (nn.Linear(516, latentspace))
else: saved_model.append(file_path) if len(saved_model) > 0: sort_model = [int(os.path.split(os.path.splitext(model)[0])[-1]) for model in saved_model] sort_model.sort() last_epoch = sort_model[-1] latest_model = "{}.pth".format(os.path.join(model_path, str(last_epoch))) net = torch.load(latest_model) net = net.cuda() LOG.info("Load model: {}".format(latest_model)) else: net = models.DenseNet(num_init_features=model_init_features, growth_rate=model_growth_rate, block_config=model_config, num_classes=10) # net = nn.DataParallel(net) net = net.cuda() LOG.info("Initialize model") loss_func = nn.CrossEntropyLoss() # optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay) optimizer = optim.SGD(net.parameters(), lr=learning_rate, weight_decay=weight_decay, momentum=0.9, nesterov=True) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=learning_milestones, gamma=learning_gamma) scheduler.last_epoch = last_epoch - 2 def train(model, loader, optm): total_loss = 0.0
def get_model(model_name, output_size, pretrained=True, extra=None, mid_extra=0, extra_activation=nn.ReLU(), mlps=[], dropout=0, bn=False, patient_emdb=None, return_features=False): if model_name.startswith('resne'): m = getattr(models, model_name) model = m(pretrained=pretrained) model.fc = nn.Linear(model.fc.in_features, output_size) last = 'fc' elif model_name.startswith('densenet'): m = getattr(models, model_name) model = m(pretrained=pretrained) model.classifier = nn.Linear(model.classifier.in_features, output_size) last = 'classifier' elif model_name.startswith('my_densenet'): model = models.DenseNet(32, block_config=(6, 12, 32), num_init_features=64, num_classes=output_size) elif model_name in [ 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3' ] or model_name.startswith('tf_'): model = geffnet.create_model(model_name, pretrained=pretrained) model.classifier = nn.Linear(model.classifier.in_features, output_size) last = 'classifier' elif model_name == 'DCTConvModel': model = DCTConvModel() elif model_name == 'xception': model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet') model.last_linear = nn.Linear(model.last_linear.in_features, output_size) last = 'last_linear' else: raise ValueError('no model named ' + model_name) if extra is not None and mid_extra > 0: model = ExtraModel(model, last, extra, mid_linear=mid_extra, extra_activation=extra_activation, mlps=mlps, dropout=dropout, bn=bn, patient_emdb=patient_emdb, return_features=return_features) elif len(mlps) > 0: in_last = model._modules[last].in_features out_last = model._modules[last].out_features nmlps = [in_last] + mlps sq = OrderedDict([ x for s in [[(f'mlp_linear{i}', nn.Linear(nmlps[i], nmlps[i + 1]) ), (f'mlp_act{i}', extra_activation), (f'mlp_bn{i}', nn.BatchNorm1d(nmlps[i + 1]) if bn else Noop()), (f'mlp_dropout{i}', nn.Dropout(dropout) if dropout > 0 else Noop())] for i in range(len(mlps))] for x in s ]) sq['last_linear'] = nn.Linear(nmlps[-1], out_last) model._modules[last] = nn.Sequential(sq) return model
def load_defined_model(name, num_classes): model = models.__dict__[name](num_classes=num_classes) print(name) print(num_classes) #Densenets don't (yet) pass on num_classes, hack it in for 169 if name == 'densenet169': model = models.DenseNet(num_init_features=64, growth_rate=32, \ block_config=(6, 12, 32, 32), num_classes=num_classes) elif name == 'densenet121': model = models.DenseNet(num_init_features=64, growth_rate=32, \ block_config=(6, 12, 24, 16), num_classes=num_classes) elif name == 'densenet201': model = models.DenseNet(num_init_features=64, growth_rate=32, \ block_config=(6, 12, 48, 32), num_classes=num_classes) elif name == 'densenet161': model = models.DenseNet(num_init_features=96, growth_rate=48, \ block_config=(6, 12, 36, 24), num_classes=num_classes) elif name.startswith('densenet'): raise ValueError( "Cirumventing missing num_classes kwargs not implemented for %s" % name) # summary(model,(3,224,224)) pretrained_state = model_zoo.load_url(model_urls[name]) if name.startswith('densenet'): pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' ) for key in list(pretrained_state.keys()): res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) pretrained_state[new_key] = pretrained_state[key] del pretrained_state[key] # remove num_batches_tracked layers new_state = { key: value for key, value in model.state_dict().items() if not key.endswith('num_batches_tracked') } #model.load_state_dict(new_state) #Diff #diff = [s for s in diff_states(model.state_dict(), pretrained_state)] diff = [s for s in diff_states(new_state, pretrained_state)] print("Replacing the following state from initialized", name, ":", \ [d[0] for d in diff]) # ['classifier.6.weight', 'classifier.6.bias'] in alexnet for name, value in diff: pretrained_state[name] = value #assert len([s for s in diff_states(model.state_dict(), pretrained_state)]) == 0 assert len([s for s in diff_states(new_state, pretrained_state)]) == 0 # ipdb.set_trace() #Merge model.load_state_dict(pretrained_state) return model, diff
"""" Jain et al. (2020) suggested transfer learning on a Testing the original DenseNet on internet images """ "" import io from PIL import Image import requests from torch.autograd import Variable import torchvision.models as models import torchvision.transforms as transforms DenseNet = models.DenseNet() # This may take a few minutes. # Random cat img taken from Google IMG_URL = 'https://images.pexels.com/photos/104827/cat-pet-animal-domestic-104827.jpeg?auto=compress&cs=tinysrgb&dpr=2&h=650&w=940' # Class mask used when training VGG as json, courtesy of the 'Example code' link above. LABELS_URL = 'https://s3.amazonaws.com/outcome-blog/imagenet/labels.json' # Let's get our class mask for the output. response = requests.get( LABELS_URL) # Make an HTTP GET request and store the response. labels = {int(key): value for key, value in response.json().items()} # Let's get the cat img. response = requests.get(IMG_URL) img = Image.open(io.BytesIO( response.content)) # Read bytes and store as an img. # We can do all this preprocessing using a transform pipeline. min_img_size = 224 # The min size, as noted in the PyTorch pretrained models doc, is 224 px.
# 自己改测试用的大小 50000改成别的->? x = torch.unsqueeze(train_img, dim=1)[:SPLIT_RATIO * 50000] / 255. y = train_out[:50000] # mini-sample for testing x_t = torch.unsqueeze(train_img, dim=1)[:300] / 255. y_t = train_out[:300] my_dataset = Data.TensorDataset(x, y) train_loader = Data.DataLoader(dataset=my_dataset, batch_size=BATCH_SIZE, shuffle=True) # vgg16 = model.vgg16(pretrained=True) des = model.DenseNet(num_init_features=32, num_classes=10) cnn = nn.Sequential( nn.Conv2d(1, 3, 5, 1, 2, bias=True), des, # nn.Conv2d(1,3,5,1,2,bias=True), # vgg16, # nn.Dropout(0.5), # nn.Linear(1000,10), # nn.Softmax() ) # print(cnn) # optimizer = torch.optim.Adam(cnn.parameters(), lr = LR) # optimizer = torch.optim.Adam(cnn.parameters(), lr = 1e-4) optimizer = torch.optim.Adam(cnn.parameters(), lr=0.0001)
def training(model_name, trainloader, validloader, input_channel=3, epochs=1, resume=True, self_define=True, only_print=False): # load self defined or official net assert model_name in ["LeNet", "VGG16", "ResNet", "DenseNet"] if self_define: if model_name == "LeNet": net = LeNet(input_channel) elif model_name == "VGG16": net = VGG16(input_channel) elif model_name == "ResNet": net = ResNet(input_channel) elif model_name == "DenseNet": net = DenseNet(input_channel) else: if model_name == "LeNet": net = LeNet(input_channel) # on official LeNet elif model_name == "VGG16": net = models.vgg16_bn(pretrained=False, num_classes=10) elif model_name == "ResNet": net = models.resnet50(pretrained=False, num_classes=10) elif model_name == "DenseNet": net = models.DenseNet(num_classes=10) # sum of net parameters number print("Number of trainable parameters in %s : %f" % (model_name, sum(p.numel() for p in net.parameters() if p.requires_grad))) # print model structure if only_print: print(net) return # resume training param_path = "./model/%s_%s_parameter.pt" % (model_name, "define" if self_define else "official") if resume: if os.path.exists(param_path): net.load_state_dict(torch.load(param_path)) net.train() print("Resume training " + model_name) else: print("Train %s from scratch" % model_name) else: print("Train %s from scratch" % model_name) # define loss function and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # train on GPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('train on %s' % device) net.to(device) running_loss = 0.0 train_losses = [] valid_losses = [] mini_batches = 125 * 5 for epoch in range(epochs): for i, data in enumerate(trainloader, 0): # get one batch # inputs, labels = data inputs, labels = data[0].to(device), data[1].to(device) # switch model to training mode, clear gradient accumulators net.train() optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % mini_batches == mini_batches - 1: # print and valid every <mini_batches> mini-batches # validate model in validation dataset valid_loss = valid(net, validloader, criterion, device) print('[%d, %5d] train loss: %.3f, validset loss: %.3f' % ( epoch + 1, i + 1, running_loss / mini_batches, valid_loss)) train_losses.append(running_loss / mini_batches) valid_losses.append(valid_loss) running_loss = 0.0 # save parameters torch.save(net.state_dict(), param_path) # # save checkpoint # torch.save({ # 'epoch': epoch, # 'model_state_dict': net.state_dict(), # 'optimizer_state_dict': optimizer.state_dict(), # 'loss': loss # }, "./checkpoints/epoch_" + str(epoch) + ".tar") print('Finished Training, %d images in all' % (len(train_losses) * batch_size * mini_batches / epochs)) # draw loss curve assert len(train_losses) == len(valid_losses) loss_x = range(0, len(train_losses)) plt.plot(loss_x, train_losses, label="train loss") plt.plot(loss_x, valid_losses, label="valid loss") plt.title("Loss for every %d mini-batch" % mini_batches) plt.xlabel("%d mini-batches" % mini_batches) plt.ylabel("Loss") plt.legend() plt.savefig(model_name + "_loss.png") plt.show()
help='check_point_dir', type=str, default='data/models') parser.add_argument('--loss', dest='loss', help='loss function', type=str, default='focal') args = parser.parse_args() # init densenet161 gpu = True net161 = modules.DenseNet(growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, bn_size=4, drop_rate=0, num_classes=3) net161.load_state_dict('data/models_pretrained/densenet161-8d451a50.pth', strict=True) # init net # gpu = True # net = densenet.DenseNet( # num_classes=3, # depth=46, # growthRate=12, # compressionRate=2, # dropRate=0.3 # )
def evalidation(model_name, testloader, classes, input_channel=3, self_define=True): dataiter = iter(testloader) images, labels = dataiter.next() # print images imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(batch_size))) # load model parameter assert model_name in ["LeNet", "VGG16", "ResNet", "DenseNet"] param_path = "./model/%s_%s_parameter.pt" % (model_name, "define" if self_define else "official") print("load model parameter from %s" % param_path) if self_define: if model_name == "LeNet": net = LeNet(input_channel) elif model_name == "VGG16": net = VGG16(input_channel) elif model_name == "ResNet": net = ResNet(input_channel) elif model_name == "DenseNet": net = DenseNet(input_channel) else: if model_name == "LeNet": net = LeNet(input_channel) elif model_name == "VGG16": net = models.vgg16_bn(pretrained=False, num_classes=10) elif model_name == "ResNet": net = models.resnet50(pretrained=False, num_classes=10) elif model_name == "DenseNet": net = models.DenseNet(num_classes=10) net.load_state_dict(torch.load(param_path)) net.eval() # predict outputs = net(images) _, predicted = torch.max(outputs, 1) print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(batch_size))) # to gpu device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net.to(device) # evaluate class_correct = np.zeros(10) class_total = np.zeros(10) with torch.no_grad(): for data in testloader: inputs, labels = data[0].to(device), data[1].to(device) outputs = net(inputs) _, predicted = torch.max(outputs, 1) for i in range(batch_size): label = labels[i] class_total[label] += 1 if predicted[i] == label: class_correct[label] += 1 print("\nEvery class precious: \n ", ' '.join("%5s : %2d %%\n" % (classes[i], 100 * class_correct[i]/class_total[i]) for i in range(len(classes)))) print("\n%d images in all, Total precious: %2d %%" % (np.sum(class_total), 100 * np.sum(class_correct) / np.sum(class_total)))
pytest.param( "models.resnet", "ResNet", {"layers": [2, 2, 2, 2]}, [], {"block": Bottleneck}, models.ResNet(block=Bottleneck, layers=[2, 2, 2, 2]), id="ResNetConf", ), pytest.param( "models.densenet", "DenseNet", {}, [], {}, models.DenseNet(), id="DenseNetConf", ), pytest.param( "models.squeezenet", "SqueezeNet", {}, [], {}, models.SqueezeNet(), id="SqueezeNetConf", ), pytest.param( "models.mnasnet", "MNASNet", {"alpha": 1.0},