def compute_time(): """ 计算100次,取平均值 :return: """ model_alexnet = AlexNet(num_classes=1000) model_googlenet = googlenet.GoogLeNet(num_classes=1000) model_alexnet.eval() model_googlenet.eval() total_time_alexnet = 0.0 total_time_googlenet = 0.0 epoch = 100 for i in range(epoch): data = torch.randn((1, 3, 224, 224)) start = time.time() outputs = model_alexnet.forward(data) end = time.time() total_time_alexnet += (end - start) start = time.time() outputs = model_googlenet.forward(data) end = time.time() total_time_googlenet += (end - start) print('[alexnet] time: {:.4f}'.format(total_time_alexnet / epoch)) print('[googlenet] time: {:.4f}'.format(total_time_googlenet / epoch)) print('time_googlenet / time_alexnet: {:.3f}'.format(total_time_googlenet / total_time_alexnet))
def train_pipe(args, part='parameters'): torch.manual_seed(args.seed) deepspeed.runtime.utils.set_random_seed(args.seed) # # Build the model # # VGG also works :-) #net = vgg19(num_classes=10) net = AlexNet(num_classes=10) net = PipelineModule(layers=join_layers(net), loss_fn=torch.nn.CrossEntropyLoss(), num_stages=args.pipeline_parallel_size, partition_method=part, activation_checkpoint_interval=0) trainset = cifar_trainset(args.local_rank) engine, _, _, _ = deepspeed.initialize( args=args, model=net, model_parameters=[p for p in net.parameters() if p.requires_grad], training_data=trainset) for step in range(args.steps): loss = engine.train_batch()
def pytorch_cos(): model = AlexNet(num_classes=2) optimizer = optim.SGD(params=model.parameters(), lr=0.0001) epoch = 100 len_loader = 100 scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=2, T_mult=2, eta_min=1e-6, last_epoch=-1) plt.figure() x = [] y = [] for e in range(epoch): for i in range(len_loader): step = e + i / len_loader scheduler.step(step) lr = scheduler.get_last_lr()[0] x.append(step) y.append(lr) plt.plot(x, y) plt.xticks(np.arange(0, epoch + 1, 4)) plt.show()
def __init__(self): super(Feature_AlexNet, self).__init__() self.model = AlexNet() self.model.load_state_dict(model_zoo.load_url(model_urls['alexnet'])) self.fc1 = nn.Linear(256 * 6 * 6, 4096) self.bn1_fc = nn.BatchNorm1d(4096) self.fc2 = nn.Linear(4096, 2048) self.bn2_fc = nn.BatchNorm1d(2048) self.relu = nn.ReLU(inplace=True)
def compute_param(): model_alexnet = AlexNet(num_classes=1000) model_googlenet = googlenet.GoogLeNet(num_classes=1000) model_alexnet.eval() model_googlenet.eval() num_alexnet = util.num_model(model_alexnet) num_googlenet = util.num_model(model_googlenet) print('[alexnet] param num: {}'.format(num_alexnet)) print('[googlenet] param num: {}'.format(num_googlenet)) print('num_alexnet / num_googlenet: {:.2f}'.format(num_alexnet / num_googlenet))
def alexnet(num_classes, num_domains=None, pretrained=True): """AlexNet model architecture from the `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = AlexNet() if pretrained: model.load_state_dict(model_zoo.load_url(model_urls['alexnet'])) print('Load pre trained model') num_ftrs = model.classifier[-1].in_features model.classifier[-1] = nn.Linear(num_ftrs, num_classes) nn.init.xavier_uniform_(model.classifier[-1].weight, .1) nn.init.constant_(model.classifier[-1].bias, 0.) return model
def test_resnet50_UniFLOPs(): model = AlexNet() input = torch.randn(1,3,224,224) params = calParameters(model) flops = calGuideline(model, input) print("flops: ", flops, "params: ", params)
def __init__(self): super().__init__() self.features = AlexNet().features self.classifier = nn.Sequential(nn.Dropout(), nn.Conv2d(256, 4096, kernel_size=6), nn.ReLU(), nn.Dropout(), nn.Conv2d(4096, 4096, kernel_size=1), nn.ReLU(), nn.Conv2d(4096, 21, kernel_size=1))
def __init__(self, alexnet_model: torchvision_models.AlexNet): super().__init__() self.normalizer = ImageNetNormalizer() self.model = alexnet_model.eval() assert len(self.model.features) == 13 self.layer1 = nn.Sequential(self.model.features[:2]) self.layer2 = nn.Sequential(self.model.features[2:5]) self.layer3 = nn.Sequential(self.model.features[5:8]) self.layer4 = nn.Sequential(self.model.features[8:10]) self.layer5 = nn.Sequential(self.model.features[10:12]) self.layer6 = self.model.features[12]
def get_model(device=None): # 加载CNN模型 model = AlexNet(num_classes=2) model.load_state_dict( torch.load('./models/best_linear_svm_alexnet_car.pth')) model.eval() # 取消梯度追踪 for param in model.parameters(): param.requires_grad = False if device: model = model.to(device) return model
def train_base(args): torch.manual_seed(args.seed) # VGG also works :-) #net = vgg19(num_classes=10) net = AlexNet(num_classes=10) trainset = cifar_trainset(args.local_rank) engine, _, dataloader, __ = deepspeed.initialize( args=args, model=net, model_parameters=[p for p in net.parameters() if p.requires_grad], training_data=trainset) dataloader = RepeatingLoader(dataloader) data_iter = iter(dataloader) rank = dist.get_rank() gas = engine.gradient_accumulation_steps() criterion = torch.nn.CrossEntropyLoss() total_steps = args.steps * engine.gradient_accumulation_steps() step = 0 for micro_step in range(total_steps): batch = next(data_iter) inputs = batch[0].to(engine.device) labels = batch[1].to(engine.device) outputs = engine(inputs) loss = criterion(outputs, labels) engine.backward(loss) engine.step() if micro_step % engine.gradient_accumulation_steps() == 0: step += 1 if rank == 0 and (step % 10 == 0): print(f'step: {step:3d} / {args.steps:3d} loss: {loss}')
class Feature_AlexNet(nn.Module): def __init__(self): super(Feature_AlexNet, self).__init__() self.model = AlexNet() self.model.load_state_dict(model_zoo.load_url(model_urls['alexnet'])) self.fc1 = nn.Linear(256 * 6 * 6, 4096) self.bn1_fc = nn.BatchNorm1d(4096) self.fc2 = nn.Linear(4096, 2048) self.bn2_fc = nn.BatchNorm1d(2048) self.relu = nn.ReLU(inplace=True) def forward(self, x, reverse=False): x = self.model.avgpool(self.model.features(x)) x_feat = x.view(x.size(0), 256 * 6 * 6) x = self.relu(self.bn1_fc(self.fc1(x_feat))) x = F.dropout(x, training=self.training) if reverse: x = grad_reverse(x, self.lambd) x = self.relu(self.bn2_fc(self.fc2(x))) return x, x_feat
def get_model(name, input_size=None, output=None): name = name.lower() if name == 'lenet-300-100': model = LeNet_300_100(input_size, output) elif name == 'lenet-5': model = LeNet(input_size, output) elif 'vgg' in name: # if 'bn' in name: if name == 'vgg11': model = vgg11(pretrained=False, num_classes=output) elif name == 'vgg16': model = vgg16(pretrained=False, num_classes=output) else: assert False for n, m in model.named_modules(): if hasattr(m, 'bias') and not isinstance(m, _BatchNorm): if m.bias is not None: if m.bias.sum() == 0: m.bias = None elif 'alexnet' in name: model = AlexNet(num_classes=output) for n, m in model.named_modules(): if hasattr(m, 'bias') and not isinstance(m, _BatchNorm): if m.bias is not None: if m.bias.sum() == 0: m.bias = None elif 'resnet' in name: if name == 'resnet20': model = resnet20(num_classes=output) elif name == 'resnet32': model = resnet32(num_classes=output) else: assert False for n, m in model.named_modules(): if hasattr(m, 'bias') and not isinstance(m, _BatchNorm): if m.bias is not None: if m.bias.sum() == 0: m.bias = None else: assert False return model
def alexnet(pretrained=False, **kwargs): # 224*224 if pretrained: model = AlexNet(**kwargs) pretrained_state_dict = torch.load( './Authority/alexnet-owt-4df8aa71.pth') now_state_dict = model.state_dict() # 返回model模块的字典 pretrained_state_dict.pop('classifier.6.weight') pretrained_state_dict.pop('classifier.6.bias') now_state_dict.update(pretrained_state_dict) model.load_state_dict(now_state_dict) return model return AlexNet(**kwargs)
def test_get_matching_parameter(self): # pick a model model = AlexNet() # define how to prune it schema = eval(open(currdir / "pruning_schema_alexnet.py").read()) # prune the model according to the schema prune_model(model, schema, seed=0) correct = model.features[0].weight_orig retrieved = get_matching_parameter("features.0.weight", model) assert torch.equal(correct, retrieved) correct = model.features[0].weight_orig retrieved = get_matching_parameter("features.0.weight_orig", model) assert torch.equal(correct, retrieved) correct = model.features[3].bias retrieved = get_matching_parameter("features.3.bias_orig", model) assert torch.equal(correct, retrieved) with pytest.raises(KeyError): get_matching_parameter("blah", model)
def prepare_alexnet(alexnet: AlexNet, num_classes: int): alexnet.classifier[-1] = nn.Linear(4096, num_classes)
import torch from torchvision.models import AlexNet from torch.optim.lr_scheduler import CosineAnnealingLR import matplotlib.pyplot as plt model = AlexNet(num_classes=2) optimizer = torch.optim.Adam(model.parameters(), lr=0.1) scheduler = CosineAnnealingLR(optimizer, T_max=100) plt.figure() x = list(range(100)) y = [] for epoch in range(1, 101): optimizer.zero_grad() optimizer.step() print("第%d个epoch的学习率:%f" % (epoch, optimizer.param_groups[0]['lr'])) scheduler.step() y.append(scheduler.get_lr()[0]) # 画出lr的变化 plt.plot(x, y) plt.xlabel("epoch") plt.ylabel("lr") plt.title("learning rate's curve changes as epoch goes on!") plt.show()
transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) deepspeed.init_distributed() net = AlexNet(num_classes=10) net = PipelineModule(layers=join_layers(net), loss_fn=torch.nn.CrossEntropyLoss(), num_stages=2, partition_method="parameters", activation_checkpoint_interval=0) args = add_argument() engine, optimizer, trainloader, __ = deepspeed.initialize( args=args, model=net, model_parameters=[p for p in net.parameters() if p.requires_grad], training_data=trainset) for step in range(steps): loss = engine.train_batch()
import torch import torch.optim as optim from torch.optim import lr_scheduler from torchvision.models import AlexNet import matplotlib.pyplot as plt model = AlexNet(num_classes=2) optimizer = optim.SGD(params=model.parameters(), lr=0.01) def f_step(): scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.98) x = list(range(100)) y = [] for epoch in range(100): scheduler.step() lr = scheduler.get_lr()[0] print(epoch, lr) y.append(lr) return x, y def f_multistep(): scheduler = lr_scheduler.MultiStepLR(optimizer, [30, 80], gamma=0.98) x = list(range(100)) y = [] for epoch in range(100): scheduler.step() lr = scheduler.get_lr()[0] print(epoch, lr)
from torch.optim import lr_scheduler from torchvision.models import AlexNet import matplotlib.pyplot as plt from utils.lr_scheduler import CosineAnnealingWarmupRestarts def plot(lr_list): f = plt.figure() plt.plot(lr_list) plt.show() epochs = 200 iterations = 100 model = AlexNet() optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5) # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, epochs, eta_min=1e-4, last_epoch=-1) # scheduler = lr_scheduler.CosineAnnealingLR(optimizer, iterations, eta_min=1e-4, last_epoch=-1) scheduler = CosineAnnealingWarmupRestarts(optimizer, first_cycle_steps=iterations, cycle_mult=0.5, max_lr=0.1, min_lr=0.0, warmup_steps=1, gamma=0.5) # this zero gradient update is needed to avoid a warning message, issue #8. optimizer.zero_grad() lr_list = list() for epoch in range(epochs):
nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) x = x.view(x.size(0), 256 * 6 * 6) return self.classifier(x) model = AlexNet(10).to(device) h1 = hl.build_graph(model, torch.zeros(64, 3, 224, 224).to(device)) h1.save('images/alexnet.png', format='png') # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) def update_lr(optimizer, lr): """For updating learning rate.""" for param_group in optimizer.param_groups: param_group['lr'] = lr
import sys import torch sys.path.append('/home/wanghongwei/WorkSpace/source/tools/pytorchviz/') from torchvision.models import AlexNet from torchviz import make_dot_from_trace, make_dot model = AlexNet() x = torch.randn(1, 3, 227, 227).requires_grad_(True) with torch.onnx.set_training(model, False): trace, _ = torch.jit.get_trace_graph(model, args=(x, )) make_dot_from_trace(trace) # y = model(x) # make_dot(y, params=dict(list(model.named_parameters()) + [('x', x)]))
from torch.autograd import Variable import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchvision as tv import torchvision.transforms as transforms # Tensor的读取与保存 a = torch.Tensor(3, 4) a.cuda() torch.save(a, 'a.pth') b = torch.load('a.pth') c = torch.load('a.pth', map_location=lambda sto, loc: sto) # ---------------------------------------------------------- torch.set_default_tensor_type('torch.FloatTensor') from torchvision.models import AlexNet model = AlexNet() model.state_dict().keys() # model的保存与加载 torch.save(model.state_dict(), 'alexnet.pth') model.load_state_dict(torch.load('alexnet.pth')) opt = torch.optim.Adam(model.parameters(), lr=0.1) # 优化器的参数读取与保存 torch.save(opt.state_dict(), 'opt.pth') opt.load_state_dict(torch.load('opt.pth'))
def test_resnet50_ptflops(): net = AlexNet() flops, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True, print_per_layer_stat=True) print('Flops: ' + flops) print('Params: ' + params)
def test_resnet50_thop(): model = AlexNet() input = torch.randn(1,3,224,224) flops, params = profile(model, inputs=(input,)) flops, params = clever_format([flops, params], "%.3f") print("flops: ", flops, "params: ", params)
def __init__(self, NUM_CLASSES): super(AlexNet1, self).__init__() self.model_name = "AlexNet1" self.model = AlexNet(num_classes=NUM_CLASSES)
def train(): torch.multiprocessing.freeze_support() traindir = os.path.join('./200508_cat_classification/dogs-vs-cats', 'train') #경로를 병합함 . testdir = os.path.join('./200508_cat_classification/dogs-vs-cats', 'test') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = datautil.DataLoader(TrainImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=4, shuffle=True, num_workers=4, pin_memory=True) test_loader = datautil.DataLoader(TestImageFolder( testdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=1, shuffle=False, num_workers=1, pin_memory=False) net = AlexNet() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = net.to(device) load_model(net, './alexnet.pth') if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") net = nn.DataParallel(net) if torch.cuda.is_available(): net.cuda() import torch.optim as optim criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.004) for epoch in range(3): running_loss = 0.0 acc = 0. correct = 0 for i, data in enumerate(train_loader, 0): inputs, labels = data inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda()) optimizer.zero_grad() outputs = net(inputs) #print(outputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.data.item() prediction = torch.max(outputs.data, 1)[1] correct += prediction.eq( labels.data.view_as(prediction)).cpu().sum() if i % 2000 == 1999: total = (i + 1) * 4 print( f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.6f} acc : {correct} / {total}' ) running_loss = 0.0 print('Finished Training') save_model(net, './')
def load_model(model_name, classes=1000, pretrained=True, in_channels=3): """Load the specified VGG architecture for ImageNet Args: model_name: VGG architecture type classes: number of predicted classes pretrained: load pretrained network on ImageNet """ if pretrained: assert classes == 1000, "Pretrained models are provided only for Imagenet." kwargs = {'num_classes': classes} if model_name == 'vgg11': net = VGG.vgg11(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg13': net = VGG.vgg13(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg16': net = VGG.vgg16(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg19': net = VGG.vgg19(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg11bn': net = VGG.vgg11_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg13bn': net = VGG.vgg13_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg16bn': net = VGG.vgg16_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg19bn': net = VGG.vgg19_bn(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'vgg19_orig': net = VGG.vgg19(pretrained=False, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=3, padding=1) net.features[0] = input_layer init_weights_vgg_orig(net) elif model_name == 'alexnet': net = AlexNet(pretrained=pretrained, **kwargs) if in_channels != 3: input_layer = nn.Conv2d(in_channels, 64, kernel_size=11, stride=4, padding=2) nn.init.kaiming_normal_(input_layer.weight, mode='fan_out', nonlinearity='relu') input_layer.bias.data.zero_() net.features[0] = input_layer elif model_name == 'lenet': kwargs['in_channels'] = in_channels net = lenet(**kwargs) else: raise ValueError("Unsupported model architecture.") return net
step_size_down=20, mode='triangular'): scheduler = CyclicLR(self.optimizer, base_lr=base_lr, max_lr=max_lr, step_size_up=step_size_up, step_size_down=step_size_down, mode=mode) return scheduler def adjust(self, base_lr, type): pass if __name__ == '__main__': net = AlexNet(num_classes=2) optimizer = SGD(net.parameters(), lr=0.0003) adj = AdjustLr(optimizer) sch1 = adj.LambdaLR_(milestone=5, gamma=0.92) epoches = 40 plt.figure() x1 = list(range(epoches)) y1 = list() lr = optimizer.param_groups[0]['lr'] for epoch in range(epoches): optimizer.step() sch1.step(epoch) a = sch1.get_lr() print(epoch, a) y1.append(a)
plt.xlabel('Epochs') plt.ylabel('Cross entropy Loss') plt.legend() plt.show() def plot_data(exp_id): epochs, lrs, train_err, val_err, train_loss, val_loss = load_experiment(exp_id) plot_lr_data(epochs, lrs) plot_err_data(epochs, train_err, val_err) plot_loss_data(epochs, train_loss, val_loss) if __name__ == '__main__': plot_data(8) exit(1) from torchvision.models import AlexNet model = AlexNet() optimizer = optim.SGD(model.parameters(), lr=0.1) scheduler = CosineWithRestartLR( optimizer, min_lr=1e-4, max_lr=0.1, restart_interval=10, restart_multiplier=2, amplitude_decay=1 ) # scheduler = AdaptiveLR( # optimizer, # start_lr = 0.01, # mu=0.99, # eps=0.1, # last_epoch=-1