def get_new_model(args, tmp_scale=True): if args.model == 'resnet18': return resnet.ResNet18(tmp_scale=tmp_scale, num_classes=args.num_classes) elif args.model == 'resnet50': return resnet.ResNet50(tmp_scale=tmp_scale, num_classes=args.num_classes) elif args.model == 'resnet101': return resnet.ResNet101(tmp_scale=tmp_scale, num_classes=args.num_classes) elif args.model == 'inceptionv4': return inception.inceptionv4(tmp_scale=tmp_scale, num_classes=args.num_classes) elif args.model == 'densenet': return densenet.DenseNet(tmp_scale=tmp_scale)
def loadmodel(nb_class=10, img_HW=8, pretrain_model='resnet18'): classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # Model print('==> Building model..') if pretrain_model == "resnet_hist18": net = attnResNet50.resnet18(num_classes=nb_class, image_HW=img_HW) # net = attnResNet50.load_resnet_imagenet(model=net, modelname="resnet18") elif pretrain_model == "resnet_hist50": net = attnResNet50.resnet50(num_classes=nb_class, image_HW=img_HW, pretrained=False) # net = attnResNet50.load_resnet_imagenet(model=net, modelname="resnet50") elif pretrain_model is None: net = attnResNet50.resnet50(num_classes=nb_class, image_HW=img_HW) elif pretrain_model == "resnet18": net = resnet.ResNet18(n_classes=nb_class) # net = attnResNet50.load_resnet_imagenet(model=net, modelname="resnet18") elif pretrain_model == "resnet50": net = resnet.ResNet50(n_classes=nb_class) # net = attnResNet50.load_resnet_imagenet(model=net, modelname="resnet50") print(net) with torch.no_grad(): net = net.to(device) attnResNet50.initialize_weights(net) # if device == 'cuda': # net = torch.nn.DataParallel(net) # cudnn.benchmark = True print("Compute on device") if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' checkpoint = torch.load('./checkpoint/ckpt'+run_start_time+'.pth') net.load_state_dict(checkpoint['net']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() # optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) optimizer = optim.Adam(net.parameters(), lr=args.lr) return net, criterion, optimizer
def main(): if args.resume: if not os.path.isfile('./checkpoint/{}.pkl'.format(args.model)): raise ValueError('no models saved....!!!!') print 'resume from checkpoint....' net = torch.load('./checkpoint/{}.pkl'.format(args.model)) else: if args.model == 'vgg16': net = vgg.VGG(args.model) elif args.model == 'vgg19': net = vgg.VGG(args.model) elif args.model == 'resnet18': net = resnet.ResNet18() elif args.model == 'resnet34': net = resnet.ResNet34() elif args.model == 'resnet50': net = resnet.ResNet50() elif args.model == 'resnet101': net = resnet.ResNet101() elif args.model == 'resnet152': net = resnet.ResNet152() elif args.model == 'densenet121': net = densenet.DenseNet121() elif args.model == 'densenet161': net = densenet.DenseNet161() elif args.model == 'densenet169': net = densenet.DenseNet169() elif args.model == 'densenet201': net = densenet.DenseNet201() else: raise ValueError('model not implemented...!!') net.cuda(args.gpu) net = nn.DataParallel(net, device_ids = range(torch.cuda.device_count())) criterion = nn.CrossEntropyLoss().cuda(args.gpu) optim = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4) lr_scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=100, gamma=0.1) for e in xrange(args.epoch): train(e, net, criterion, optim, lr_scheduler) test(e, net)
transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) validset = datasets.ImageFolder(args.data_path, transform=transform_test) valid_loader = torch.utils.data.DataLoader(validset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_workers) labels_list = [x[1] for x in validset] if args.model == 'vgg': model = vgg.VGG('VGG19') elif args.model == 'resnet': model = resnet.ResNet50() elif args.model == 'DenseNet121': model = densenet.DenseNet121() ckpt = torch.load(args.cp_path, map_location=lambda storage, loc: storage) try: model.load_state_dict(ckpt['model_state'], strict=True) except RuntimeError as err: print("Runtime Error: {0}".format(err)) except: print("Unexpected error:", sys.exc_info()[0]) raise if args.cuda: device = get_freer_gpu() model = model.cuda(device)
rproj_size = -1 if args.model == 'vgg': model = vgg.VGG('VGG19', nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=n_classes, emb_size=emb_size, r_proj_size=rproj_size) elif args.model == 'resnet': model = resnet.ResNet50(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=n_classes, emb_size=emb_size, r_proj_size=rproj_size) elif args.model == 'densenet': model = densenet.DenseNet121(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=n_classes, emb_size=emb_size, r_proj_size=rproj_size) print(model.load_state_dict(ckpt['model_state'], strict=False)) if args.cuda:
import torchvision import torchvision.transforms as transforms import torch.optim as optim import torch.utils.data from models import vgg, resnet, densenet # Training settings parser = argparse.ArgumentParser( description='Test architectures with dummy data') parser.add_argument('--model', choices=['vgg', 'resnet', 'densenet'], default='resnet') parser.add_argument('--nclasses', type=int, default=10, metavar='N', help='number of classes') args = parser.parse_args() if args.model == 'vgg': model = vgg.VGG('VGG19', n_classes=args.nclasses) elif args.model == 'resnet': model = resnet.ResNet50(n_classes=args.nclasses) elif args.model == 'densenet': model = densenet.DenseNet121(n_classes=args.nclasses) batch = torch.rand(3, 1, 257, 257) out = model.forward(batch) print(out.size())
def train(lr, l2, momentum, smoothing, warmup, model, emb_size, n_hidden, hidden_size, dropout_prob, epochs, batch_size, valid_batch_size, n_workers, cuda, data_path, valid_data_path, hdf_path, valid_hdf_path, checkpoint_path, softmax, pretrained, pretrained_path, max_gnorm, stats, log_dir, eval_every, ablation): args_dict = locals() cp_name = get_cp_name(checkpoint_path) if pretrained_path != 'none': print('\nLoading pretrained model from: {}\n'.format( args.pretrained_path)) ckpt = torch.load(pretrained_path, map_location=lambda storage, loc: storage) dropout_prob, n_hidden, hidden_size, emb_size = ckpt[ 'dropout_prob'], ckpt['n_hidden'], ckpt['hidden_size'], ckpt[ 'emb_size'] if 'r_proj_size' in ckpt: rproj_size = ckpt['r_proj_size'] else: rproj_size = -1 print('\nUsing pretrained config for discriminator. Ignoring args.') args_dict['dropout_prob'], args_dict['n_hidden'], args_dict[ 'hidden_size'], args_dict[ 'emb_size'] = dropout_prob, n_hidden, hidden_size, emb_size if log_dir != 'none': writer = SummaryWriter(log_dir=os.path.join(log_dir, cp_name), comment=model, purge_step=0) writer.add_hparams(hparam_dict=args_dict, metric_dict={'best_eer': 0.0}) else: writer = None if stats == 'cars': mean, std = [0.4461, 0.4329, 0.4345], [0.2888, 0.2873, 0.2946] elif stats == 'cub': mean, std = [0.4782, 0.4925, 0.4418], [0.2330, 0.2296, 0.2647] elif stats == 'sop': mean, std = [0.5603, 0.5155, 0.4796], [0.2939, 0.2991, 0.3085] elif stats == 'imagenet': mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225] if hdf_path != 'none': transform_train = transforms.Compose([ transforms.ToPILImage(), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.RandomPerspective(p=0.1), transforms.RandomGrayscale(p=0.1), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) trainset = Loader(hdf_path, transform_train) else: transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.RandomPerspective(p=0.1), transforms.RandomGrayscale(p=0.1), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) trainset = datasets.ImageFolder(data_path, transform=transform_train) train_loader = torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=True, num_workers=n_workers, worker_init_fn=set_np_randomseed, pin_memory=True) if valid_hdf_path != 'none': transform_test = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) validset = Loader(args.valid_hdf_path, transform_test) else: transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) validset = datasets.ImageFolder(args.valid_data_path, transform=transform_test) valid_loader = torch.utils.data.DataLoader(validset, batch_size=valid_batch_size, shuffle=True, num_workers=n_workers, pin_memory=True) nclasses = trainset.n_classes if isinstance(trainset, Loader) else len( trainset.classes) if model == 'vgg': model_ = vgg.VGG('VGG19', nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=nclasses, emb_size=emb_size, r_proj_size=rproj_size) elif model == 'resnet': model_ = resnet.ResNet50(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=nclasses, emb_size=emb_size, r_proj_size=rproj_size) elif model == 'densenet': model_ = densenet.DenseNet121(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=nclasses, emb_size=emb_size, r_proj_size=rproj_size) if pretrained_path != 'none': if ckpt['sm_type'] == 'am_softmax': del (ckpt['model_state']['out_proj.w']) elif ckpt['sm_type'] == 'softmax': del (ckpt['model_state']['out_proj.w.weight']) del (ckpt['model_state']['out_proj.w.bias']) print(model_.load_state_dict(ckpt['model_state'], strict=False)) print('\n') if pretrained: print('\nLoading pretrained encoder from torchvision\n') if model == 'vgg': model_pretrained = torchvision.models.vgg19(pretrained=True) elif model == 'resnet': model_pretrained = torchvision.models.resnet50(pretrained=True) elif model == 'densenet': model_pretrained = torchvision.models.densenet121(pretrained=True) print( model_.load_state_dict(model_pretrained.state_dict(), strict=False)) print('\n') if cuda: device = get_freer_gpu() model_ = model_.cuda(device) torch.backends.cudnn.benchmark = True optimizer = TransformerOptimizer(optim.SGD(model_.parameters(), lr=lr, momentum=momentum, weight_decay=l2, nesterov=True), lr=lr, warmup_steps=warmup) trainer = TrainLoop(model_, optimizer, train_loader, valid_loader, max_gnorm=max_gnorm, label_smoothing=smoothing, verbose=-1, cp_name=cp_name, save_cp=True, checkpoint_path=checkpoint_path, ablation=ablation, cuda=cuda, logger=writer) for i in range(5): print(' ') print('Hyperparameters:') print('Selected model: {}'.format(model)) print('Embedding size: {}'.format(emb_size)) print('Hidden layer size: {}'.format(hidden_size)) print('Number of hidden layers: {}'.format(n_hidden)) print('Random projection size: {}'.format(rproj_size)) print('Dropout rate: {}'.format(dropout_prob)) print('Batch size: {}'.format(batch_size)) print('LR: {}'.format(lr)) print('Momentum: {}'.format(momentum)) print('l2: {}'.format(l2)) print('Label smoothing: {}'.format(smoothing)) print('Warmup iterations: {}'.format(warmup)) print('Softmax Mode is: {}'.format(softmax)) print('Pretrained: {}'.format(pretrained)) print('Pretrained path: {}'.format(pretrained_path)) print('Evaluate every {} iterations.'.format(eval_every)) print('Ablation Mode: {}'.format(ablation)) print(' ') if i > 0: print(' ') print('Trial {}'.format(i + 1)) print(' ') try: cost = trainer.train(n_epochs=epochs, save_every=epochs + 10, eval_every=eval_every) print(' ') print('Best e2e EER in file ' + cp_name + ' was: {}'.format(cost[0])) print('Best cos EER in file ' + cp_name + ' was: {}'.format(cost[1])) print(' ') if log_dir != 'none': writer.add_hparams(hparam_dict=args_dict, metric_dict={'best_eer': cost[0]}) return cost[0] except: print("Error:", sys.exc_info()) pass print('Returning dummy cost due to failures while training.') return 0.99
'dropout_prob'], ckpt['n_hidden'], ckpt['hidden_size'], ckpt[ 'emb_size'] print('\nUsing pretrained config for discriminator. Ignoring args.') if args.model == 'vgg': model = vgg.VGG('VGG19', nh=args.n_hidden, n_h=args.hidden_size, dropout_prob=args.dropout_prob, sm_type=args.softmax, n_classes=args.nclasses, emb_size=args.emb_size) elif args.model == 'resnet': model = resnet.ResNet50(nh=args.n_hidden, n_h=args.hidden_size, dropout_prob=args.dropout_prob, sm_type=args.softmax, n_classes=args.nclasses, emb_size=args.emb_size) elif args.model == 'densenet': model = densenet.DenseNet121(nh=args.n_hidden, n_h=args.hidden_size, dropout_prob=args.dropout_prob, sm_type=args.softmax, n_classes=args.nclasses, emb_size=args.emb_size) if args.pretrained_path: if ckpt['sm_type'] == 'am_softmax': del (ckpt['model_state']['out_proj.w']) elif ckpt['sm_type'] == 'softmax': del (ckpt['model_state']['out_proj.w.weight'])
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) validset = datasets.ImageFolder(args.valid_data_path, transform=transform_test) valid_loader = torch.utils.data.DataLoader(validset, batch_size=args.valid_batch_size, shuffle=True, num_workers=args.n_workers, pin_memory=True) if args.model == 'vgg': model = vgg.VGG('VGG19', sm_type=args.softmax) elif args.model == 'resnet': model = resnet.ResNet50(sm_type=args.softmax) elif args.model == 'densenet': model = densenet.DenseNet121(sm_type=args.softmax) if args.cuda: device = get_freer_gpu() model = model.to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.l2, momentum=args.momentum) trainer = TrainLoop(model, optimizer, train_loader,
def train(lr, l2, momentum, slack, patience, model, epochs, batch_size, valid_batch_size, train_mode, n_workers, cuda, data_path, valid_data_path, checkpoint_path): cp_name = get_cp_name(checkpoint_path) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([x / 255 for x in [125.3, 123.0, 113.9]], [x / 255 for x in [63.0, 62.1, 66.7]]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([x / 255 for x in [125.3, 123.0, 113.9]], [x / 255 for x in [63.0, 62.1, 66.7]]) ]) #trainset = Loader(data_path) trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) train_loader = torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=False, num_workers=n_workers, worker_init_fn=set_np_randomseed) #validset = Loader(valid_data_path) validset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) valid_loader = torch.utils.data.DataLoader(validset, batch_size=valid_batch_size, shuffle=False, num_workers=n_workers) if model == 'vgg': model_ = vgg.VGG('VGG16') elif model == 'resnet': model_ = resnet.ResNet50() elif model == 'densenet': model_ = densenet.densenet_cifar() if cuda: device = get_freer_gpu() model_ = model_.cuda(device) optimizer = optim.SGD(model_.parameters(), lr=lr, weight_decay=l2, momentum=momentum) trainer = TrainLoop(model_, optimizer, train_loader, valid_loader, slack=slack, train_mode=train_mode, patience=int(patience), verbose=-1, cp_name=cp_name, save_cp=True, checkpoint_path=checkpoint_path, cuda=cuda) for i in range(5): if i > 0: print(' ') print('Trial {}'.format(i + 1)) print(' ') try: cost = trainer.train(n_epochs=epochs, save_every=epochs + 10) print(' ') print('Best cost in file ' + cp_name + 'was: {}'.format(cost)) print(' ') print('With hyperparameters:') print('Selected model: {}'.format(model)) print('Train mode: {}'.format(train_mode)) print('Batch size: {}'.format(batch_size)) print('LR: {}'.format(lr)) print('Momentum: {}'.format(momentum)) print('l2: {}'.format(l2)) print('Slack: {}'.format(slack)) print('Patience: {}'.format(patience)) print(' ') return cost except: pass print('Returning dummy cost due to failures while training.') print('With hyperparameters:') print('Selected model: {}'.format(model)) print('Train mode: {}'.format(train_mode)) print('Batch size: {}'.format(batch_size)) print('LR: {}'.format(lr)) print('Momentum: {}'.format(momentum)) print('l2: {}'.format(l2)) print('Slack: {}'.format(slack)) print('Patience: {}'.format(patience)) print(' ') return 0.99
train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.n_workers, worker_init_fn=set_np_randomseed, pin_memory=True) if args.valid_hdf_path: transform_test = transforms.Compose([transforms.ToPILImage(), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) validset = Loader(args.valid_hdf_path, transform_train) else: transform_test = transforms.Compose([transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) validset = datasets.ImageFolder(args.valid_data_path, transform=transform_test) valid_loader = torch.utils.data.DataLoader(validset, batch_size=args.valid_batch_size, shuffle=True, num_workers=args.n_workers, pin_memory=True) if args.model == 'vgg': model = vgg.VGG('VGG19', sm_type=args.softmax, n_classes=args.nclasses) elif args.model == 'resnet': model = resnet.ResNet50(sm_type=args.softmax, n_classes=args.nclasses) elif args.model == 'densenet': model = densenet.DenseNet121(sm_type=args.softmax, n_classes=args.nclasses) if args.pretrained: print('\nLoading pretrained model\n') if args.model == 'vgg': model_pretrained = torchvision.models.vgg19(pretrained=True) elif args.model == 'resnet': model_pretrained = torchvision.models.resnet50(pretrained=True) elif args.model == 'densenet': model_pretrained = torchvision.models.densenet121(pretrained=True) print(model.load_state_dict(model_pretrained.state_dict(), strict=False)) print('\n')
def train(lr, l2, momentum, smoothing, patience, model, emb_size, n_hidden, hidden_size, dropout_prob, epochs, batch_size, valid_batch_size, n_workers, cuda, data_path, hdf_path, valid_data_path, valid_hdf_path, checkpoint_path, softmax, n_classes, pretrained, max_gnorm, lr_factor): cp_name = get_cp_name(checkpoint_path) if hdf_path != 'none': transform_train = transforms.Compose([ transforms.ToPILImage(), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(30), transforms.RandomPerspective(p=0.2), transforms.ColorJitter(brightness=2), transforms.RandomGrayscale(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) trainset = Loader(hdf_path, transform_train) else: transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(30), transforms.RandomPerspective(p=0.2), transforms.ColorJitter(brightness=2), transforms.RandomGrayscale(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) trainset = datasets.ImageFolder(data_path, transform=transform_train) train_loader = torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=True, num_workers=n_workers, worker_init_fn=set_np_randomseed, pin_memory=True) if valid_hdf_path != 'none': transform_test = transforms.Compose([ transforms.ToPILImage(), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) validset = Loader(args.valid_hdf_path, transform_test) else: transform_test = transforms.Compose([ transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) validset = datasets.ImageFolder(args.valid_data_path, transform=transform_test) valid_loader = torch.utils.data.DataLoader(validset, batch_size=valid_batch_size, shuffle=True, num_workers=n_workers, pin_memory=True) if model == 'vgg': model_ = vgg.VGG('VGG19', nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=n_classes, emb_size=emb_size) elif model == 'resnet': model_ = resnet.ResNet50(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=n_classes, emb_size=emb_size) elif model == 'densenet': model_ = densenet.DenseNet121(nh=n_hidden, n_h=hidden_size, dropout_prob=dropout_prob, sm_type=softmax, n_classes=n_classes, emb_size=emb_size) if pretrained: print('\nLoading pretrained encoder from torchvision\n') if model == 'vgg': model_pretrained = torchvision.models.vgg19(pretrained=True) elif model == 'resnet': model_pretrained = torchvision.models.resnet50(pretrained=True) elif model == 'densenet': model_pretrained = torchvision.models.densenet121(pretrained=True) print( model_.load_state_dict(model_pretrained.state_dict(), strict=False)) print('\n') if cuda: device = get_freer_gpu() model_ = model_.cuda(device) torch.backends.cudnn.benchmark = True optimizer = optim.SGD(model_.parameters(), lr=lr, weight_decay=l2, momentum=momentum) trainer = TrainLoop(model_, optimizer, train_loader, valid_loader, max_gnorm=max_gnorm, patience=int(patience), lr_factor=lr_factor, label_smoothing=smoothing, verbose=-1, cp_name=cp_name, save_cp=True, checkpoint_path=checkpoint_path, cuda=cuda) for i in range(5): print(' ') print('Hyperparameters:') print('Selected model: {}'.format(model)) print('Embedding size: {}'.format(emb_size)) print('Hidden layer size: {}'.format(hidden_size)) print('Number of hidden layers: {}'.format(n_hidden)) print('Dropout rate: {}'.format(dropout_prob)) print('Batch size: {}'.format(batch_size)) print('LR: {}'.format(lr)) print('Momentum: {}'.format(momentum)) print('l2: {}'.format(l2)) print('Label smoothing: {}'.format(smoothing)) print('Patience: {}'.format(patience)) print('Softmax Mode is: {}'.format(softmax)) print('Pretrained: {}'.format(pretrained)) print(' ') if i > 0: print(' ') print('Trial {}'.format(i + 1)) print(' ') try: cost = trainer.train(n_epochs=epochs, save_every=epochs + 10) print(' ') print('Best e2e EER in file ' + cp_name + ' was: {}'.format(cost[0])) print('Best cos EER in file ' + cp_name + ' was: {}'.format(cost[1])) print(' ') return cost[0] except: print("Error:", sys.exc_info()) pass print('Returning dummy cost due to failures while training.') return 0.99
help='maximum number of frames per utterance (default: 1)') parser.add_argument('--emb-size', type=int, default=256, metavar='N', help='Embedding dimension (default: 256)') args = parser.parse_args() if args.model == 'vgg': model = vgg.VGG('VGG19', nh=args.n_hidden, n_h=args.hidden_size, emb_size=args.emb_size) elif args.model == 'resnet': model = resnet.ResNet50(nh=args.n_hidden, n_h=args.hidden_size, emb_size=args.emb_size) elif args.model == 'densenet': model = densenet.DenseNet121(nh=args.n_hidden, n_h=args.hidden_size, emb_size=args.emb_size) batch = torch.rand(3, 3, 224, 224) emb, out = model.forward(batch) print(emb.size(), out.size()) out_layer = model.out_proj(out) print(out_layer.size())
def loadCoeffIdx(pathName, modelName): skipLinear = False device = 'cuda' if torch.cuda.is_available() else 'cpu' if modelName == "vgg16_c10": model = vgg16.VGG16() elif modelName == "vgg16_c10_old": model = vgg16_old.VGG16() elif modelName == "resnet50_c10": model = resnet.ResNet50() elif modelName == "inceptionv3_c10": model = inception_v3_c10.inception_v3() elif modelName == "alexnet_in": #model = torch.hub.load('pytorch/vision', 'alexnet', pretrained=True) model = alexnet.AlexNet() skipLinear = True elif modelName == "vgg16_in": model = vgg_in.vgg16() elif modelName == "resnet50_in": model = resnet_in.resnet50() elif modelName == "transformer_wmt": translator = transformer_translate.create_model( ) # returns Transformer() translator = translator.to(device) model = translator.model else: print("Model not supported") exit(1) model = model.to(device) if ("vgg16" in modelName or "alexnet" in modelName): model = replace_vgg16(model, skipLinear) elif "transformer" in modelName: #transformer_train.replace_with_pruned(model, "model", prune_attention=True, prune_only_attention=False) pass else: replace_with_pruned(model, "model", skipLinear) if "_in" in modelName: if not torch.distributed.is_initialized(): port = np.random.randint(10000, 65536) torch.distributed.init_process_group( backend='nccl', init_method='tcp://127.0.0.1:%d' % port, rank=0, world_size=1) model = torch.nn.parallel.DistributedDataParallel(model) if not ("transformer" in modelName): model.load_state_dict(torch.load(pathName)) model.eval() #prune(model, method="cascade", q=1.0) layers = [] widths = [3] numConv = 0 assert isinstance(model, nn.Module) for n, m in model.named_modules(): print(type(m)) if isinstance(m, torch.nn.Conv2d): numConv += 1 if isinstance(m, torch.nn.Linear): numConv += 1 if isinstance(m, PrunedConv): #print(m.mask) #layer = m.mask.view((m.out_channels, -1)).detach().cpu().numpy() layer = m.conv.weight.view( (m.out_channels, -1)).detach().cpu().numpy() elif isinstance(m, PrunedLinear) or isinstance( m, CSP.pruned_layers.PrunedLinear) and (not skipLinear): #print(m.mask) #layer = m.mask.view((m.out_features, -1)).detach().cpu().numpy() layer = m.linear.weight.view( (m.out_features, -1)).detach().cpu().numpy() else: continue #print(n) #print(layer.shape) #layer = layer > 0 widths.append(len(layer)) layers.append(layer) #layerMask = layer > 0 #layerMask = np.transpose(layerMask) #print("NUM CONV: {}".format(numConv)) #print("NUM EXTRACTED LAYERS: {}".format(len(layers))) if "transformer" in modelName: for i in range(11): for j in range(36, 97): layers.append(layers[j]) return layers
def exportData(pathName, modelName): global skipLinear device = 'cuda' if torch.cuda.is_available() else 'cpu' if modelName == "vgg16_c10": model = vgg16.VGG16() elif modelName == "vgg16_c10_old": model = vgg16_old.VGG16() elif modelName == "resnet50_c10": model = resnet.ResNet50() elif modelName == "inceptionv3_c10": model = inception_v3_c10.inception_v3() elif modelName == "alexnet_in": #model = torch.hub.load('pytorch/vision', 'alexnet', pretrained=True) model = alexnet.AlexNet() skipLinear = True elif modelName == "vgg16_in": model = vgg_in.vgg16() skipLinear = True elif modelName == "resnet50_in": model = resnet_in.resnet50() elif modelName == "transformer_wmt": translator = transformer_translate.create_model( ) # returns Transformer() translator = translator.to(device) model = translator.model else: print("Model not supported") exit(1) model = model.to(device) if ("vgg16" in modelName or "alexnet" in modelName): model = replace_vgg16(model, skipLinear) elif "transformer" in modelName: #transformer_train.replace_with_pruned(model, "model", prune_attention=True, prune_only_attention=False) pass else: replace_with_pruned(model, "model", skipLinear) if "_in" in modelName: if not torch.distributed.is_initialized(): port = np.random.randint(10000, 65536) torch.distributed.init_process_group( backend='nccl', init_method='tcp://127.0.0.1:%d' % port, rank=0, world_size=1) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[0], output_device=0) if not ("transformer" in modelName): model.load_state_dict(torch.load(pathName)) model.eval() #prune(model, method="cascade", q=1.0) #layer_num = 0 #save_dir = "/root/hostCurUser/reproduce/DNNsim/net_traces/ResNet50_ImageNet_CSP/" #lstModules = list( model.named_modules()) #for n, m in model.named_modules(): #for i in range(len(lstModules)): # if isinstance(lstModules[i], nn.Conv2d) or isinstance(lstModules[i], nn.Linear): # model[i] = DataExporter(lstModules[i], save_dir, layer_num) # layer_num += 1 f = open(MODEL_PATH + "model.csv", "w") fscale = open(SCALE_PATH, "w") fscale.write( "Layer name, IFMAP Height, IFMAP Width, Filter Height, Filter Width, Channels, Num Filter, Strides,\n" ) layer_idx = 0 models = [] # for SparTen layers = [] acts = [] weights = [] paddings = [] strides = [] idxs = [] def extract(module, input): #if module.extracted: # return if len(input[0].shape) < 4: if not ("transformer" in modelName): return try: a = input[0].detach().cpu().reshape(1, module.in_features, 1, 1) except: a = input[0].detach().cpu().reshape(-1, 1, 1) a = a[:module.in_features] a = a.reshape(1, module.in_features, 1, 1) else: a = input[0].detach().cpu() acts.append(a) if isinstance(module, torch.nn.Conv2d): layer = module.weight.view( (module.out_channels, -1)).detach().cpu().numpy() weight = module.weight.detach().cpu().numpy() tp = "conv" stride = str(max(module.stride[0], module.stride[1])) padding = str(max(module.padding[0], module.padding[1])) in_channels = module.in_channels out_channels = module.out_channels kernel_size = module.kernel_size padding_st = module.padding stride_st = module.stride elif isinstance(module, torch.nn.Linear) and (not skipLinearExport): layer = module.weight.view( (module.out_features, -1)).detach().cpu().numpy() weight = module.weight.detach().cpu().reshape( module.weight.shape[0], module.weight.shape[1], 1, 1).numpy() tp = "fc" stride = str(1) padding = str(0) in_channels = module.in_features out_channels = module.out_features kernel_size = (1, 1) padding_st = (0, 0) stride_st = (1, 1) else: print("{} does not exist".format(module)) exit(1) name = str(module.layer_idx) weights.append(weight) paddings.append(int(padding)) strides.append(int(stride)) f.write(name + "," + tp + "," + stride + "," + padding + ",\n") layers.append(layer) models.append({ 'in_channels': in_channels, 'out_channels': out_channels, 'kernel': kernel_size, 'name': tp + name, 'padding': padding_st, 'weights': weight, 'IFM': a.cpu().numpy(), 'stride': stride_st }) idxs.append(module.layer_idx) #module.extracted = True #replace_with_exporter(model, "model", f, fscale) for n, m in model.named_modules(): if isinstance(m, torch.nn.Conv2d): weight = m.weight.detach().cpu().numpy() if weight.shape[2] != weight.shape[3]: continue #weights.append(weight) #m.extracted = False m.register_forward_pre_hook(extract) #name = str(layer_idx) #tp = "conv" #stride = str(max(m.stride[0], m.stride[1])) #padding = str(max(m.padding[0], m.padding[1])) #paddings.append(int(padding)) #f.write(name+"," + tp+"," + stride+"," + padding + ",\n") m.layer_idx = layer_idx layer_idx += 1 elif isinstance(m, torch.nn.Linear): if not ("transformer" in modelName): continue #weight = m.weight.detach().cpu().reshape(m.weight.shape[0], m.weight.shape[1], 1, 1).numpy() #weights.append(weight) #m.extracted = False m.register_forward_pre_hook(extract) #name = str(layer_idx) #tp = "fc" #stride = str(1) #padding = str(0) #paddings.append(int(padding)) #f.write(name+"," + tp+"," + stride+"," + padding + ",\n") m.layer_idx = layer_idx layer_idx += 1 if "_in" in modelName: IFM = torch.rand(1, 3, 224, 224).cuda() model(IFM) elif "_c10" in modelName: IFM = torch.rand(1, 3, 32, 32).cuda() model(IFM) elif "_wmt" in modelName: src_seq = [4556, 4562, 4560, 4557, 4712, 1894, 15, 4564, 4620, 0, 5] pred_seq = translator.translate_sentence( torch.LongTensor([src_seq]).to(device)) print(pred_seq) else: print("Dataset not supported") exit(1) print(len(acts)) print(len(weights)) #layers = loadCoeffIdx(pathName, modelName) with open(ST_PATH + modelName + ".h5", "wb") as f: pickle.dump(models, f) sq_ptrs = [] out_channels = [] for layer in layers: sp, oc = squeezeCoeffIdxTotal(layer, len(layer)) out_channels.append(oc) #""" i = 0 for idx in range(len(acts)): x_save = acts[idx] weight_save = weights[idx] np.save(EXPORT_PATH + "act-" + str(idx) + "-0.npy", x_save) np.save(EXPORT_PATH + "wgt-" + str(idx) + ".npy", weight_save) x_save[x_save == 0] = int(0) x_save[x_save != 0] = int(1) np.savetxt(CX_PATH + "act" + str(idx) + ".csv", x_save.reshape(-1), delimiter=",", fmt="%d") weight_save[weight_save == 0] = int(0) weight_save[weight_save != 0] = int(1) np.savetxt(CX_PATH + "Conv2D_" + str(idx) + ".csv", weight_save.reshape(weight_save.shape[0], -1), delimiter=",", fmt="%d") if x_save.shape[2] > 1: name = "Conv" + str(idx) #str(idxs[idx]) IFM_height = str(x_save.shape[2] + (2 * paddings[idx])) IFM_width = str(x_save.shape[3] + (2 * paddings[idx])) filt_height = str(weight_save.shape[2]) filt_width = str(weight_save.shape[3]) else: name = "FC" + str(idx) #str(idxs[idx]) IFM_height = str(1) IFM_width = str(1) filt_height = str(1) filt_width = str(1) channels = str(weight_save.shape[1]) if ("resnet50" in modelName) and (idx == 4 or idx == 15 or idx == 29 or idx == 49): num_filt = str(weight_save.shape[0]) else: num_filt = str(out_channels[i]) i += 1 fscale.write(name + ',\t' + IFM_height + ',\t' + IFM_width + ',\t' + filt_height + ',\t' + filt_width + ',\t' + channels + ',\t' + num_filt + ',\t' + str(strides[idx]) + ',\n') fscale.close() f.close() #""" cxf = open(CX_PATH + "ModelShape.txt", "w") cxf.write("LayerName\tLayerID\tInputShape\tOutputShape\tKernelShape\n") cx_layers = [] layer_idx = 0 for n, m in model.named_modules(): if isinstance(m, torch.nn.Conv2d): if m.weight.shape[2] != m.weight.shape[3]: continue curr = "Conv\t" + str(layer_idx) + "\t" + str( tuple(acts[layer_idx].shape)).replace('(', '').replace( ')', '').replace(' ', '') + "\t" + str( tuple(acts[layer_idx].shape)).replace('(', '').replace( ')', '').replace(' ', '') + "\t" + str( tuple( m.weight.shape)).replace('(', '').replace( ')', '').replace(' ', '') + "\n" cxf.write(curr) cx_layers.append(curr) layer_idx += 1 if isinstance(m, torch.nn.Linear) and (not skipLinear): curr = "FC\t" + str(idxs[layer_idx]) + "\t" + str( tuple(acts[layer_idx].shape)).replace('(', '').replace( ')', '').replace(' ', '') + "\t" + str( tuple(acts[layer_idx].shape)).replace('(', '').replace( ')', '').replace(' ', '') + "\t" + str( tuple( m.weight.shape)).replace('(', '').replace( ')', '').replace(' ', '') + ",1,1\n" cxf.write(curr) cx_layers.append(curr) layer_idx += 1 if "transformer" in modelName: for i in range(11): for j in range(36, 97): cxf.write(cx_layers[j]) cxf.close() return
batch_size=batch_image, class_mode='binary', color_mode='grayscale') test_generator = test_datagen.flow_from_directory(test, target_size=(input_shape[0], input_shape[1]), batch_size=batch_image, class_mode='binary', color_mode='grayscale') # Check Shape of a Batch batch_x, batch_y = train_generator.next() print(batch_x.shape) # MODEL model = resnet.ResNet50(input_shape=input_shape, num_classes=2) print(model.summary()) model.compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=[ tf.metrics.SparseCategoricalAccuracy(name='acc'), tf.metrics.SparseTopKCategoricalAccuracy(k=2, name='top5_acc') ]) # Usamos el callback que creamos metrics_to_print = collections.OrderedDict([('loss', 'loss'), ('v-loss', 'val_loss'), ('acc', 'acc'), ('v-acc', 'val_acc'),