def __init__(self): self.model = DenseNet(growth_rate=8, block_config=(2, 2, 2), bn_size=4, drop_rate=0, num_init_features=8 * 2, small_inputs=True, efficient=True) self.model.eval() self.model.load_state_dict( torch.load("save/param_best.pth", map_location=lambda storage, loc: storage)) summary(self.model, input_size=(3, 480, 640))
def print_and_save_arguments(arguments, save_dir): table = BeautifulTable() for a in arguments: table.append_row([a, arguments[a]]) ## TODO: Remove this ugly bit from the code # Get densenet configuration depth = arguments['depth'] if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] model = DenseNet( growth_rate=arguments['growth_rate'], block_config=block_config, num_classes=10, small_inputs=True, efficient=False, ) table.append_row( ["Param_per_model (M)", count_parameters(model) / 1000000.0]) table.append_row(["CUDA", torch.cuda.is_available()]) print(table) with open(os.path.join(save_dir, "config.txt"), 'a') as f: f.write(str(table)) return
def run(args, use_cuda, output_dir): trial_list = list(range(args.n_trials)) np.random.shuffle(trial_list) for trial_i in trial_list: trial_dir = os.path.join(output_dir, 'trial_{}'.format(trial_i)) os.makedirs(trial_dir, exist_ok=True) loaders, params = get_dataloaders(args.batch_size, trial_i, args.dataset, args.augment_data, early_stop=args.early_stop) if args.network_type == 'fc': model = DenseModel(input_dim=np.prod(params['input_shape']), output_dim=params['output_dim'], hidden_nodes=args.hidden_nodes, num_modules=args.n_modules, activation=args.activation) elif args.network_type == 'conv': model = ConvModel(input_shape=params['input_shape'], output_dim=params['output_dim'], num_filters=args.filters, kernel_sizes=args.kernels, strides=args.strides, dilations=args.dilations, num_modules=args.n_modules, activation=args.activation, final_layer=args.conv_final_layer) elif args.network_type == 'densenet': model = DenseNet(input_shape=params['input_shape'], output_dim=params['output_dim'], growth_rate=args.densenet_k, depth=args.densenet_depth, reduction=args.densenet_reduction, bottleneck=args.densenet_bottleneck, num_modules=args.n_modules) logging.debug(args) logging.debug('Parameters: {}'.format(model.n_parameters())) device = torch.device("cuda" if use_cuda else "cpu") model = model.to(device) model.reset_parameters() weight_path = os.path.join(trial_dir, 'initial_weights.pt') torch.save(model.state_dict(), weight_path) for lambda_i, (lambda_, learning_rate) in enumerate( zip(args.lambda_values, args.learning_rates)): model.load_state_dict(torch.load(weight_path)) lambda_dir = os.path.join(trial_dir, str(lambda_)) os.makedirs(lambda_dir, exist_ok=True) do_lambda_value(model, lambda_, learning_rate, args, loaders, params['distribution'], device, lambda_dir)
def demo(save, depth=100, growth_rate=12, efficient=True, valid_size=5000, n_epochs=300, batch_size=64, seed=None): """ A demo to show off training of efficient DenseNets. Trains and evaluates a DenseNet-BC on CIFAR-10. Args: save (str) - path to save the model to (default /tmp) depth (int) - depth of the network (number of convolution layers) (default 40) growth_rate (int) - number of features added per DenseNet layer (default 12) efficient (bool) - use the memory efficient implementation? (default True) valid_size (int) - size of validation set n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) seed (int) - manually set the random seed (default None) """ # Get densenet configuration if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] tr_dt, tr_lb, te_dt, te_lb = genconfig() tr_set = dataset(tr_dt, tr_lb) te_set = dataset(te_dt, te_lb) if valid_size: indices = torch.randperm(len(tr_set)) train_indices = indices[:len(indices) - valid_size] valid_indices = indices[len(indices) - valid_size:] train_set = torch.utils.data.Subset(tr_set, train_indices) valid_set = torch.utils.data.Subset(tr_set, valid_indices) else: train_set = tr_set valid_set = None # Models model = DenseNet( growth_rate=growth_rate, block_config=block_config, num_classes=4, small_inputs=True, efficient=efficient, ) print(model) # Make save directory if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) # Train the model train(model=model, train_set=train_set, valid_set=valid_set, test_set=te_set, save=save, n_epochs=n_epochs, batch_size=batch_size, seed=seed) print('Done!')
def load_CC_ResNet(input_shape=(512, 512, 3)): return DenseNet.DenseNet(nb_dense_block=4, growth_rate=16, nb_filter=32, reduction=0.5, dropout_rate=0.0, weight_decay=0, classes=1000, weights_path=None)
def get_model(args): if args.model == 'mlp': model = MLP(num_classes=args.n_classes) elif args.model == 'resnet': model = ResNet(20, num_classes=args.n_classes) elif args.model == 'densenet': model = DenseNet(40, num_classes=args.n_classes) return model
def densenet_regression(x, y, hidden_dims, loss_fn=nn.MSELoss(), lr=1e-2, weight_decay=1e-4, num_iters=1000, print_every=100, device=torch.device('cuda'), verbose=True, plot=True): """Use DenseNet with linear layers for regression Returns: model: nn.Module, learned regression model """ in_dim = x.size(-1) out_dim = y.size(-1) hidden_dims = hidden_dims + [out_dim] model = DenseNet(in_dim, hidden_dims, dense=True, residual=False).to(device) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=weight_decay, amsgrad=True) for i in range(num_iters): y_pred = model(x) loss = loss_fn(y_pred, y) optimizer.zero_grad() loss.backward() optimizer.step() if verbose and (i % print_every == 0): print(i, loss.item()) if plot: plt.plot(x.detach().cpu().numpy().reshape(-1), y.detach().cpu().numpy().reshape(-1), 'ro', x.detach().cpu().numpy().reshape(-1), y_pred.detach().cpu().numpy().reshape(-1), 'g--') plt.show() return model
class LineFilter: def __init__(self): self.model = DenseNet(growth_rate=8, block_config=(2, 2, 2), bn_size=4, drop_rate=0, num_init_features=8 * 2, small_inputs=True, efficient=True) self.model.eval() self.model.load_state_dict( torch.load("save/param_best.pth", map_location=lambda storage, loc: storage)) summary(self.model, input_size=(3, 480, 640)) def predict(self, input_data): output = self.model(input_data).squeeze() output[output > 255] = 255 output[output < 150] = 0 output = output.detach().numpy() return output.astype(dtype=np.uint8)
def count_densenNet_param(growth_rate, depth): if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] model = DenseNet( growth_rate=growth_rate, block_config=block_config, num_classes=10, small_inputs=True, ) return count_parameters(model)
def _get_model_archtecture(self): """ 通过配置文件得到网络的结构 :return: """ if self.config['type'] == 'DenseNet': from models import DenseNet model_object = DenseNet.DenseNet(self.config['model_config']) if self.config['type'] == 'ResNet': from models import ResNet model_object = ResNet.ResNet(self.config['model_config']) if self.config['type'] == 'MobilenetV2': from models import MobileNet model_object = MobileNet.mobilenetV2(self.config['model_config']) self.model = model_object.constuct_model()
def main(): args = parse_args() cfg = cfg_from_file(args.config) print('using config: {}'.format(args.config)) data_cfg = cfg['data'] datalist = datalist_from_file(data_cfg['datalist_path']) num_train_files = len(datalist) // 5 * 4 train_dataset = IMetDataset(data_cfg['dataset_path'], datalist[:num_train_files], transform=data_cfg['train_transform']) test_dataset = IMetDataset(data_cfg['dataset_path'], datalist[num_train_files:], transform=data_cfg['test_transform']) train_dataloader = data.DataLoader(train_dataset, batch_size=data_cfg['batch_size'], shuffle=True) test_dataloader = data.DataLoader(test_dataset, batch_size=data_cfg['batch_size']) backbone_cfg = cfg['backbone'].copy() backbone_type = backbone_cfg.pop('type') if backbone_type == 'ResNet': backbone = ResNet(**backbone_cfg) elif backbone_type == 'ResNeXt': backbone = ResNeXt(**backbone_cfg) elif backbone_type == 'DenseNet': backbone = DenseNet(**backbone_cfg) classifier = Classifier(backbone, backbone.out_feat_dim).cuda() train_cfg, log_cfg = cfg['train'], cfg['log'] criterion = FocalLoss() optimizer = torch.optim.SGD(classifier.parameters(), lr=train_cfg['lr'], weight_decay=train_cfg['weight_decay'], momentum=train_cfg['momentum']) trainer = Trainer(model=classifier, train_dataloader=train_dataloader, val_dataloader=test_dataloader, criterion=criterion, optimizer=optimizer, train_cfg=train_cfg, log_cfg=log_cfg) trainer.train()
def demo(depth=58, growth_rate=12, efficient=False): # Get densenet configuration if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] model = DenseNet( growth_rate=growth_rate, block_config=block_config, num_classes=4, small_inputs=True, efficient=efficient, ) model.load_state_dict(torch.load(os.path.join('./ckpt2/model.dat'))) t = list(model.state_dict()) n = len(t) w = [] for x in range(3): w.append([]) for i in range(9): w[x].append([]) for j in range(9): w[x][i].append(0) for name in model.state_dict(): if len(name) == 49 and name[37] == 'c': x, i, j = int(name[19]), int(name[32]), int(name[34]) a = abs(model.state_dict()[name]) w[x - 1][j][i - 1] = a.sum() for x in range(3): for i in range(9): mx = 0 for j in range(i, 9): mx = max(mx, w[x][i][j]) for j in range(i, 9): w[x][i][j] = w[x][i][j] / mx mask = [] for i in range(9): mask.append([]) for j in range(9): mask[i].append(j > i) ax = [] for x in range(3): sns.set() ax.append(sns.heatmap(w[x], vmin = 0, vmax = 1, cmap = 'jet', square = True, mask = mask)) ax[x].set_title('Dense Block %s' % (x + 1)) ax[x].set_xlabel('Target layer (l)', fontsize=15) ax[x].set_ylabel('Source layer (s)', fontsize=15) plt.show(ax[x])
def __init__(self, net_cfgs, test_dataloader, validate_thresh): self.test_dataloader = test_dataloader self.validate_thresh = validate_thresh self.net_list = [] for cfg in net_cfgs: backbone_cfg = cfg.copy() backbone_type = backbone_cfg.pop('type') checkpoint = backbone_cfg.pop('checkpoint') if backbone_type == 'ResNet': backbone = ResNet(**backbone_cfg) elif backbone_type == 'ResNeXt': backbone = ResNeXt(**backbone_cfg) elif backbone_type == 'DenseNet': backbone = DenseNet(**backbone_cfg) classifier = Classifier(backbone, backbone.out_feat_dim).cuda() assert os.path.exists(checkpoint) state_dict = torch.load(checkpoint) classifier.load_state_dict(state_dict['model_params']) classifier.eval() self.net_list.append(classifier)
def train(data, save, valid_size=5000, seed=None, depth=40, growth_rate=12, n_epochs=300, batch_size=64, lr=0.1, wd=0.0001, momentum=0.9): """ A function to train a DenseNet-BC on CIFAR-100. Args: data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) save (str) - path to save the model to (default /tmp) valid_size (int) - size of validation set seed (int) - manually set the random seed (default None) depth (int) - depth of the network (number of convolution layers) (default 40) growth_rate (int) - number of features added per DenseNet layer (default 12) n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) lr (float) - initial learning rate wd (float) - weight decay momentum (float) - momentum """ if seed is not None: torch.manual_seed(seed) # Make save directory if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) # Get densenet configuration if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] # Data transforms mean = [0.5071, 0.4867, 0.4408] stdv = [0.2675, 0.2565, 0.2761] train_transforms = tv.transforms.Compose([ tv.transforms.RandomCrop(32, padding=4), tv.transforms.RandomHorizontalFlip(), tv.transforms.ToTensor(), tv.transforms.Normalize(mean=mean, std=stdv), ]) test_transforms = tv.transforms.Compose([ tv.transforms.ToTensor(), tv.transforms.Normalize(mean=mean, std=stdv), ]) # Split training into train and validation - needed for calibration # # IMPORTANT! We need to use the same validation set for temperature # scaling, so we're going to save the indices for later train_set = tv.datasets.CIFAR100(data, train=True, transform=train_transforms, download=True) valid_set = tv.datasets.CIFAR100(data, train=True, transform=test_transforms, download=False) indices = torch.randperm(len(train_set)) train_indices = indices[:len(indices) - valid_size] valid_indices = indices[len(indices) - valid_size:] if valid_size else None # Make dataloaders train_loader = torch.utils.data.DataLoader( train_set, pin_memory=True, batch_size=batch_size, sampler=SubsetRandomSampler(train_indices)) valid_loader = torch.utils.data.DataLoader( valid_set, pin_memory=True, batch_size=batch_size, sampler=SubsetRandomSampler(valid_indices)) # Make model, criterion, and optimizer model = DenseNet(growth_rate=growth_rate, block_config=block_config, num_classes=100) # Wrap model if multiple gpus if torch.cuda.device_count() > 1: model_wrapper = torch.nn.DataParallel(model).cuda() else: model_wrapper = model.cuda() print(model_wrapper) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model_wrapper.parameters(), lr=lr, momentum=momentum, nesterov=True) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1) # Train model best_error = 1 for epoch in range(1, n_epochs + 1): scheduler.step() run_epoch( loader=train_loader, model=model_wrapper, criterion=criterion, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, train=True, ) valid_results = run_epoch( loader=valid_loader, model=model_wrapper, criterion=criterion, optimizer=optimizer, epoch=epoch, n_epochs=n_epochs, train=False, ) # Determine if model is the best _, _, valid_error = valid_results if valid_error[0] < best_error: best_error = valid_error[0] print('New best error: %.4f' % best_error) # When we save the model, we're also going to # include the validation indices torch.save(model.state_dict(), os.path.join(save, 'model.pth')) torch.save(valid_indices, os.path.join(save, 'valid_indices.pth')) print('Done!')
# include the validation indices torch.save(model.state_dict(), os.path.join(save, 'model.pth')) torch.save(valid_indices, os.path.join(save, 'valid_indices.pth')) print('Done!') if __name__ == '__main__': """ Train a 40-layer DenseNet-BC on CIFAR-100 Args: --data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) --save (str) - path to save the model to (default /tmp) --valid_size (int) - size of validation set --seed (int) - manually set the random seed (default None) """ data = path + '\\data\\' save = path + 'model\\' block_config = [(40 - 4) // 6 for _ in range(3)] model = DenseNet(growth_rate=12, block_config=block_config, num_classes=100) params = torch.load(save + 'model.pth') model.load_state_dict(params) # fire.Fire(train)
U2OS_data = RecursionDataset(os.path.join(args.data_dir, 'rxrx1.csv'), train_dir, sirna_encoder, 'train', 'U2OS', args=args) U2OS_loader = DataLoader(U2OS_data, batch_size=2, shuffle=False) loaders = [HEPG2_train_loader, HUVEC_train_loader, RPE_train_loader] est_time = get_est_time() net = None if (args.model_type == "densenet"): print("you picked densenet") net = DenseNet(len(sirnas)).to('cuda') elif (args.model_type == "kaggle"): print("you picked kaggle") net = ModelAndLoss(len(sirnas)).to('cuda') elif (args.model_type == "multitask"): print("you picked multitask") net = MultitaskNet(len(sirnas)).to('cuda') elif (args.model_type == "lr"): print("you picked lr") net = LogisticRegression(512 * 512 * 6, len(sirnas)).to('cuda') elif (args.model_type == "cnn"): print("you picked cnn") net = CNN(len(sirnas)).to('cuda') else: print("invalid model type")
def get_epoch_number(depth, growth_rate, ensemble_size, dataset, batch_size=256, num_epochs=120): # get data set train_set, _, small_inputs, num_classes, _ = data.get_dataset( dataset, "./data/") train_loader = torch.utils.data.DataLoader( train_set, batch_size=batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=0) # single single = [ DenseNet(growth_rate=growth_rate, block_config=[(depth - 4) // 6 for _ in range(3)], num_classes=num_classes, small_inputs=True, efficient=small_inputs, compression=1.0) ] # vertical ensemble_depth, _ = get_ensemble_depth(depth, growth_rate, ensemble_size) vertical = [ DenseNet(growth_rate=growth_rate, block_config=[(ensemble_depth - 4) // 6 for _ in range(3)], num_classes=num_classes, small_inputs=small_inputs, efficient=True, compression=1.0) ] * ensemble_size # horizontal ensemble_growth_rate, _ = get_ensemble_growth_rate(depth, growth_rate, ensemble_size) horizontal = [ DenseNet(growth_rate=ensemble_growth_rate, block_config=[(depth - 4) // 6 for _ in range(3)], num_classes=num_classes, small_inputs=small_inputs, efficient=True, compression=1.0) ] * ensemble_size single_epoch_time = estimate_epoch_time( single, train_loader) # ?just to warm the cache? vertical_epoch_time = estimate_epoch_time(vertical, train_loader) single_epoch_time = estimate_epoch_time(single, train_loader) horizontal_epoch_time = estimate_epoch_time(horizontal, train_loader) print("single: ", single_epoch_time) print("vertical: ", vertical_epoch_time) print("horizontal: ", horizontal_epoch_time) max_epoch_time = max(single_epoch_time, vertical_epoch_time, horizontal_epoch_time) single_epochs = (max_epoch_time / single_epoch_time) * num_epochs vertical_epochs = (max_epoch_time / vertical_epoch_time) * num_epochs horizontal_epochs = (max_epoch_time / horizontal_epoch_time) * num_epochs return single_epochs, vertical_epochs, horizontal_epochs
def main(args): # Fix seeds molgrid.set_random_seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Set CuDNN options for reproducibility torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Set up libmolgrid e = molgrid.ExampleProvider(data_root=args.data_root, balanced=True, shuffle=True) e.populate(args.train_file) gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions(e.num_types()) tensor_shape = (args.batch_size, ) + dims # Construct input tensors input_tensor = torch.zeros(tensor_shape, dtype=torch.float32, device='cuda') float_labels = torch.zeros(args.batch_size, dtype=torch.float32) # Initialise network - Two models currently available (see models.py for details) if args.model == 'Ragoza': model = Basic_CNN(dims).to('cuda') elif args.model == 'Imrie': model = DenseNet(dims, block_config=(4, 4, 4)).to('cuda') else: print("Please specify a valid architecture") exit() # Set weights for network if args.weights: model.load_state_dict(torch.load(args.weights)) print("Loaded model parameters") else: model.apply(weights_init) print("Randomly initialised model parameters") # Print number of parameters in model print("Number of model params: %dK" % (sum([x.nelement() for x in model.parameters()]) / 1000)) # Train network # Construct optimizer optimizer = optim.SGD(model.parameters(), lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = lr_scheduler.ExponentialLR(optimizer, args.anneal_rate) print("Initial learning rate: %.6f" % scheduler.get_lr()[0]) # Train loop losses = [] for it in range(1, args.iterations + 1): # Load data batch = e.next_batch(args.batch_size) gmaker.forward(batch, input_tensor, random_rotation=args.rotate, random_translation=args.translate) batch.extract_label(0, float_labels) labels = float_labels.long().to('cuda') # Train optimizer.zero_grad() output = model(input_tensor) loss = F.cross_entropy(output, labels) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.clip_gradients) optimizer.step() losses.append(float(loss)) # Anneal learning rate if it % args.anneal_iter == 0: scheduler.step() print("Current iteration: %d, Annealing learning rate: %.6f" % (it, scheduler.get_lr()[0])) # Progress if it % args.display_iter == 0: print("Current iteration: %d, Loss: %.3f" % (it, float(np.mean(losses[-args.display_iter:])))) # Save model if it % args.save_iter == 0: print("Saving model after %d iterations." % it) torch.save( model.state_dict(), args.save_dir + "/" + args.save_prefix + ".iter-" + str(it)) # Test model if args.test_file != '' and it % args.test_iter == 0: # Set to test mode model.eval() predictions = [] labs = [] e_test = molgrid.ExampleProvider(data_root=args.data_root, balanced=False, shuffle=False) e_test.populate(args.test_file) num_samples = e_test.size() num_batches = -(-num_samples // args.batch_size) for _ in range(num_batches): # Load data batch = e_test.next_batch(args.batch_size) batch_predictions = [] batch.extract_label(0, float_labels) labs.extend(list(float_labels.detach().cpu().numpy())) for _ in range(args.num_rotate): gmaker.forward(batch, input_tensor, random_rotation=args.rotate, random_translation=0.0) # Predict output = F.softmax(model(input_tensor), dim=1) batch_predictions.append( list(output.detach().cpu().numpy()[:, 0])) predictions.extend(list(np.mean(batch_predictions, axis=0))) # Print performance labs = labs[:num_samples] predictions = predictions[:num_samples] print("Current iter: %d, AUC: %.2f" % (it, roc_auc_score(labs, predictions)), flush=True) # Set to train mode model.train()
batch_size=args.batch_size, shuffle=True, drop_last=True, worker_init_fn=None, **kwargs) test_loader = torch.utils.data.DataLoader(globals()[args.dataset]( root=args.data, transform=test_transform, train=False), batch_size=args.test_batch_size, shuffle=False, **kwargs) num_classes = {"CIFAR10": 10, "CIFAR100": 100, "ImageNet": 1000} input_size = args.dataset == 'ImageNet' and 224 or 32 model = DenseNet(num_init_features=args.num_init_features, block_config=args.block_config, compression=args.compression, input_size=input_size, bn_size=args.bn_size, num_classes=num_classes[args.dataset], efficient=True) print(model) if not os.path.isdir(args.checkpoints): os.mkdir(args.checkpoints) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) if 'epoch' in checkpoint: args.start_epoch = checkpoint['epoch'] + 1
def train_model(x_trn, x_val, config, num_classes, weights, device): y_gr_val = x_val['grapheme_root'] y_vo_val = x_val['vowel_diacritic'] y_co_val = x_val['consonant_diacritic'] model_params = config.model_params train_dataset = BengaliDataset(x_trn, n_channels=model_params.n_channels, img_size=config.img_size, transforms=config.augmentation) valid_dataset = BengaliDataset(x_val, n_channels=model_params.n_channels, img_size=config.img_size, transforms=None) train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=3) valid_loader = DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False, num_workers=3) del train_dataset, valid_dataset gc.collect() if 'se_resnext' in model_params.model_name: model = SeNet(model_name=model_params.model_name, n_channels=model_params.n_channels, n_classes=model_params.n_classes, pretrained=model_params.pretrained).to(device) elif 'resnetd' in model_params.model_name: model = ResNetD(model_name=model_params.model_name, n_channels=model_params.n_channels, n_classes=model_params.n_classes).to(device) elif 'resne' in model_params.model_name: model = ResNet(model_name=model_params.model_name, n_channels=model_params.n_channels, n_classes=model_params.n_classes, pretrained=model_params.pretrained).to(device) elif 'densenet' in model_params.model_name: model = DenseNet(model_name=model_params.model_name, n_channels=model_params.n_channels, n_classes=model_params.n_classes, pretrained=model_params.pretrained).to(device) elif 'efficient' in model_params.model_name: model = ENet(model_name=model_params.model_name, n_channels=model_params.n_channels, n_classes=model_params.n_classes, pretrained=model_params.pretrained).to(device) if config.model_state_fname is not None: model.load_state_dict( torch.load(f'../logs/{config.model_state_fname}/weight_best.pt')) # relu_replace(model) # bn_replace(model) weights_gr = torch.from_numpy(weights['grapheme_root']).cuda() weights_vo = torch.from_numpy(weights['vowel_diacritic']).cuda() weights_co = torch.from_numpy(weights['consonant_diacritic']).cuda() if config.loss == 'CrossEntropyLoss': # criterion_gr = nn.CrossEntropyLoss(weight=weights_gr) # criterion_vo = nn.CrossEntropyLoss(weight=weights_vo) # criterion_co = nn.CrossEntropyLoss(weight=weights_co) criterion_gr = nn.CrossEntropyLoss() criterion_vo = nn.CrossEntropyLoss() criterion_co = nn.CrossEntropyLoss() elif config.loss == 'SmoothCrossEntropyLoss': criterion_gr = SmoothCrossEntropyLoss() criterion_vo = SmoothCrossEntropyLoss() criterion_co = SmoothCrossEntropyLoss() elif config.loss == 'FocalLoss': criterion_gr = FocalLoss() criterion_vo = FocalLoss() criterion_co = FocalLoss() elif config.loss == 'ClassBalancedLoss': criterion_gr = ClassBalancedLoss(samples_per_cls=weights_gr, no_of_classes=num_classes[0], loss_type='focal', beta=0.999, gamma=2.0) criterion_vo = ClassBalancedLoss(samples_per_cls=weights_vo, no_of_classes=num_classes[1], loss_type='focal', beta=0.999, gamma=2.0) criterion_co = ClassBalancedLoss(samples_per_cls=weights_co, no_of_classes=num_classes[2], loss_type='focal', beta=0.999, gamma=2.0) elif config.loss == 'OhemLoss': criterion_gr = OhemLoss(rate=1.0) criterion_vo = OhemLoss(rate=1.0) criterion_co = OhemLoss(rate=1.0) if config.optimizer.type == 'Adam': optimizer = Adam(params=model.parameters(), lr=config.optimizer.lr, amsgrad=False, weight_decay=1e-4) elif config.optimizer.type == 'SGD': optimizer = SGD(params=model.parameters(), lr=config.optimizer.lr, momentum=0.9, weight_decay=1e-4, nesterov=True) scheduler_flg = False if config.scheduler.type == 'cosine': scheduler_flg = True scheduler = CosineAnnealingLR(optimizer, T_max=config.scheduler.t_max, eta_min=config.scheduler.eta_min) elif config.scheduler.type == 'cosine-warmup': scheduler_flg = True scheduler = CosineAnnealingWarmUpRestarts( optimizer, T_0=config.scheduler.t_0, T_mult=config.scheduler.t_mult, eta_max=config.scheduler.eta_max, T_up=config.scheduler.t_up, gamma=config.scheduler.gamma) elif config.scheduler.type == 'step': scheduler_flg = True scheduler = StepLR(optimizer, step_size=config.scheduler.step_size, gamma=config.scheduler.gamma) elif config.scheduler.type == 'reduce': scheduler_flg = True scheduler = ReduceLROnPlateau(optimizer, factor=config.scheduler.factor, patience=config.scheduler.patience, min_lr=config.scheduler.min_lr) best_epoch = -1 best_val_score = -np.inf mb = master_bar(range(config.epochs)) train_loss_list = [] val_loss_list = [] val_score_list = [] counter = 0 for epoch in mb: start_time = time.time() model.train() avg_loss = 0. for images, labels_gr, labels_vo, labels_co in progress_bar( train_loader, parent=mb): images = Variable(images).to(device) labels_gr = Variable(labels_gr).to(device) labels_vo = Variable(labels_vo).to(device) labels_co = Variable(labels_co).to(device) if config.loss == 'OhemLoss': if epoch < config.epochs * 0.2: new_rate = 1.0 elif epoch < config.epochs * 0.4: new_rate = 0.8 elif epoch < config.epochs * 0.6: new_rate = 0.75 elif epoch < config.epochs * 0.8: new_rate = 0.7 else: new_rate = 0.6 criterion_gr.update_rate(new_rate) criterion_vo.update_rate(new_rate) criterion_co.update_rate(new_rate) r = np.random.rand() mix_params = config.augmentation.mix_params if r < mix_params.mixup: images, targets = mixup(images, labels_gr, labels_vo, labels_co, 1.0) preds_gr, preds_vo, preds_co = model(images) loss = mixup_criterion(preds_gr, preds_vo, preds_co, targets, criterion_gr, criterion_vo, criterion_co) elif r < (mix_params.mixup + mix_params.cutmix): images, targets = cutmix(images, labels_gr, labels_vo, labels_co, 1.0) preds_gr, preds_vo, preds_co = model(images) loss = cutmix_criterion(preds_gr, preds_vo, preds_co, targets, criterion_gr, criterion_vo, criterion_co) else: preds_gr, preds_vo, preds_co = model(images.float()) loss = criterion_gr(preds_gr, labels_gr) \ + criterion_vo(preds_vo, labels_vo) \ + criterion_co(preds_co, labels_co) optimizer.zero_grad() loss.backward() optimizer.step() avg_loss += loss.item() / len(train_loader) train_loss_list.append(avg_loss) model.eval() valid_gr_preds = np.zeros((len(valid_loader.dataset), num_classes[0])) valid_vo_preds = np.zeros((len(valid_loader.dataset), num_classes[1])) valid_co_preds = np.zeros((len(valid_loader.dataset), num_classes[2])) avg_val_loss = 0. for i, (images, labels_gr, labels_vo, labels_co) in enumerate(valid_loader): images = Variable(images).to(device) labels_gr = Variable(labels_gr).to(device) labels_vo = Variable(labels_vo).to(device) labels_co = Variable(labels_co).to(device) preds_gr, preds_vo, preds_co = model(images.float()) loss_gr = criterion_gr(preds_gr, labels_gr) loss_vo = criterion_vo(preds_vo, labels_vo) loss_co = criterion_co(preds_co, labels_co) valid_gr_preds[i * config.batch_size:( i + 1) * config.batch_size] = preds_gr.cpu().detach().numpy() valid_vo_preds[i * config.batch_size:( i + 1) * config.batch_size] = preds_vo.cpu().detach().numpy() valid_co_preds[i * config.batch_size:( i + 1) * config.batch_size] = preds_co.cpu().detach().numpy() avg_val_loss += (loss_gr.item() + loss_vo.item() + loss_co.item()) / len(valid_loader) recall_gr = recall_score(y_gr_val, np.argmax(valid_gr_preds, axis=1), average='macro') recall_vo = recall_score(y_vo_val, np.argmax(valid_vo_preds, axis=1), average='macro') recall_co = recall_score(y_co_val, np.argmax(valid_co_preds, axis=1), average='macro') val_score = np.average([recall_gr, recall_vo, recall_co], weights=[2, 1, 1]) val_loss_list.append(avg_val_loss) val_score_list.append(val_score) if scheduler_flg and config.scheduler.type != 'reduce': scheduler.step() elif scheduler_flg and config.scheduler.type == 'reduce': scheduler.step(avg_val_loss) elapsed = time.time() - start_time mb.write( f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} val_gr_score: {recall_gr:.4f} val_vo_score: {recall_vo:.4f} val_co_score: {recall_co:.4f} time: {elapsed:.0f}s' ) logging.debug( f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f} avg_val_loss: {avg_val_loss:.4f} val_score: {val_score:.4f} val_gr_score: {recall_gr:.4f} val_vo_score: {recall_vo:.4f} val_co_score: {recall_co:.4f} time: {elapsed:.0f}s' ) if best_val_score < val_score: best_epoch = epoch + 1 best_val_score = val_score best_recall_gr = recall_gr best_recall_vo = recall_vo best_recall_co = recall_co best_valid_gr_preds = valid_gr_preds best_valid_vo_preds = valid_vo_preds best_valid_co_preds = valid_co_preds best_model = model.state_dict() counter = 0 counter += 1 if counter == config.early_stopping: break print('\n\n===================================\n') print(f'CV: {best_val_score}\n') print(f'BEST EPOCH: {best_epoch}') print(f'BEST RECALL GR: {best_recall_gr}') print(f'BEST RECALL VO: {best_recall_vo}') print(f'BEST RECALL CO: {best_recall_co}') logging.debug(f'\n\nCV: {best_val_score}\n') logging.debug(f'BEST EPOCH: {best_epoch}') logging.debug(f'BEST RECALL GR: {best_recall_gr}') logging.debug(f'BEST RECALL VO: {best_recall_vo}') logging.debug(f'BEST RECALL CO: {best_recall_co}\n\n') print('\n===================================\n\n') return best_model, [ best_valid_gr_preds, best_valid_vo_preds, best_valid_co_preds ], best_val_score, train_loss_list, val_loss_list, val_score_list
def demo(data, save, depth=100, growth_rate=12, efficient=True, valid_size=5000, n_epochs=300, batch_size=64, seed=None, conv_type="SPECTRAL_PARAM"): """ A demo to show off training of efficient DenseNets. Trains and evaluates a DenseNet-BC on CIFAR-10. Args: data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) save (str) - path to save the model to (default /tmp) depth (int) - depth of the network (number of convolution layers) (default 100) growth_rate (int) - number of features added per DenseNet layer (default 12) efficient (bool) - use the memory efficient implementation? (default True) valid_size (int) - size of validation set n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) seed (int) - manually set the random seed (default None) conv_type (str) - the type of applied convolution: SPECTRAL_PARAM or STANDARD """ conv_type = ConvType[conv_type] # Get densenet configuration if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] # Data transforms mean = [0.5071, 0.4867, 0.4408] stdv = [0.2675, 0.2565, 0.2761] train_transforms = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) # Datasets train_set = datasets.CIFAR10(data, train=True, transform=train_transforms, download=True) test_set = datasets.CIFAR10(data, train=False, transform=test_transforms, download=True) # Models model = DenseNet( growth_rate=growth_rate, block_config=block_config, num_classes=10, small_inputs=True, efficient=efficient, conv_type=conv_type ) print(model) # Make save directory if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) # Train the model train(model=model, train_set=train_set, test_set=test_set, save=save, conv_type=conv_type, valid_size=valid_size, n_epochs=n_epochs, batch_size=batch_size, seed=seed) print('Done!')
# some default params dataset/architecture related train_params = train_params_cifar print("Params:") for k, v in model_params.items(): print("\t%s: %s" % (k, v)) print("Train params:") for k, v in train_params.items(): print("\t%s: %s" % (k, v)) model_params['use_Y'] = False print("Prepare training data...") data_provider = get_data_provider_by_name(model_params['dataset'], train_params) print("Initialize the model..") tf.reset_default_graph() model = DenseNet(data_provider=data_provider, **model_params) print("Loading trained model") model.load_model() print("Data provider test images: ", data_provider.test.num_examples) print("Testing...") loss, accuracy = model.test(data_provider.test, batch_size=30) print("mean cross_entropy: %f, mean accuracy: %f" % (loss, accuracy)) def labels_to_one_hot(labels, n_classes=43+1): """Convert 1D array of labels to one hot representation Args:
def demo(data='./mydata', save='./save', depth=100, growth_rate=12, efficient=True, valid_size=None, n_epochs=300, batch_size=64, seed=None): """ A demo to show off training of efficient DenseNets. Trains and evaluates a DenseNet-BC on CIFAR-10. Args: data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) save (str) - path to save the model to (default /tmp) depth (int) - depth of the network (number of convolution layers) (default 40) growth_rate (int) - number of features added per DenseNet layer (default 12) efficient (bool) - use the memory efficient implementation? (default True) valid_size (int) - size of validation set n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) seed (int) - manually set the random seed (default None) """ train_txt_dir = os.path.join(data, 'train.txt') valid_txt_dir = os.path.join(data, 'valid.txt') # Get densenet configuration if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 8 for _ in range(3)] # Data transforms mean = [0.5071, 0.4867, 0.4408] stdv = [0.2675, 0.2565, 0.2761] train_transforms = transforms.Compose([ # transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) # Datasets # train_set = datasets.CIFAR10(data, train=True, transform=train_transforms, download=True) train_set = MyDataset(train_txt_dir, transform=train_transforms) test_set = MyDataset(valid_txt_dir, transform=test_transforms) if valid_size: valid_set = datasets.CIFAR10(data, train=True, transform=test_transforms) indices = torch.randperm(len(train_set)) train_indices = indices[:len(indices) - valid_size] valid_indices = indices[len(indices) - valid_size:] train_set = torch.utils.data.Subset(train_set, train_indices) valid_set = torch.utils.data.Subset(valid_set, valid_indices) else: valid_set = None # Models model = DenseNet( growth_rate=growth_rate, block_config=block_config, num_classes=65, small_inputs=True, efficient=efficient, ) print(model) # Make save directory if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) # Train the model train(model=model, train_set=train_set, valid_set=valid_set, test_set=test_set, save=save, n_epochs=n_epochs, batch_size=batch_size, seed=seed) print('Done!')
def demo(data, save, depth=100, growth_rate=12, efficient=True, valid_size=5000, n_epochs=300, batch_size=64, seed=None): """ A demo to show off training of efficient DenseNets. Trains and evaluates a DenseNet-BC on CIFAR-10. Args: data (str) - path to directory where data should be loaded from/downloaded (default $DATA_DIR) save (str) - path to save the model to (default /tmp) depth (int) - depth of the network (number of convolution layers) (default 40) growth_rate (int) - number of features added per DenseNet layer (default 12) efficient (bool) - use the memory efficient implementation? (default True) valid_size (int) - size of validation set n_epochs (int) - number of epochs for training (default 300) batch_size (int) - size of minibatch (default 256) seed (int) - manually set the random seed (default None) """ # Get densenet configuration if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] # Data transforms mean=[0.49139968 0.48215841 0.44653091] stdv= [0.24703223 0.24348513 0.26158784] train_transforms = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=mean, std=stdv), ]) # Datasets train_set = datasets.CIFAR10(data, train=True, transform=train_transforms, download=True) test_set = datasets.CIFAR10(data, train=False, transform=test_transforms, download=False) if valid_size: valid_set = datasets.CIFAR10(data, train=True, transform=test_transforms) indices = torch.randperm(len(train_set)) train_indices = indices[:len(indices) - valid_size] valid_indices = indices[len(indices) - valid_size:] train_set = torch.utils.data.Subset(train_set, train_indices) valid_set = torch.utils.data.Subset(valid_set, valid_indices) else: valid_set = None # Models model = DenseNet( growth_rate=growth_rate, block_config=block_config, num_init_features=growth_rate*2, num_classes=10, small_inputs=True, efficient=efficient, ) print(model) # Print number of parameters num_params = sum(p.numel() for p in model.parameters()) print("Total parameters: ", num_params) # Make save directory if not os.path.exists(save): os.makedirs(save) if not os.path.isdir(save): raise Exception('%s is not a dir' % save) # Train the model train(model=model, train_set=train_set, valid_set=valid_set, test_set=test_set, save=save, n_epochs=n_epochs, batch_size=batch_size, seed=seed) print('Done!')
# -*- encoding:utf-8 -*- from models import DenseNet if __name__ == '__main__': cnn = DenseNet(num_blocks=1, theta=0.5) cnn.train()
# some default params dataset/architecture related train_params = get_train_params_by_name(args.dataset) model_params['data_augmentation'] = train_params['data_augmentation'] model_params['use_Y'] = train_params['use_Y'] print("Params:") for k, v in model_params.items(): print("\t%s: %s" % (k, v)) print("Train params:") for k, v in train_params.items(): print("\t%s: %s" % (k, v)) print("Prepare training data...") data_provider = get_data_provider_by_name(args.dataset, train_params) print("Initialize the model..") model = DenseNet(data_provider=data_provider, **model_params) import tensorflow as tf with tf.Session() as sess: # `sess.graph` provides access to the graph used in a `tf.Session`. writer = tf.summary.FileWriter("/tmp/log/...", sess.graph) writer.close() if args.train: print("Data provider train images: ", data_provider.train.num_examples) model.train_all_epochs(train_params) if args.test: if not args.train: model.load_model() print("Data provider test images: ", data_provider.test.num_examples) print("Testing...")
def demo(data, save, depth=40, growth_rate=12, batch_size=256): """ Applies temperature scaling to a trained model. Takes a pretrained DenseNet-CIFAR100 model, and a validation set (parameterized by indices on train set). Applies temperature scaling, and saves a temperature scaled version. NB: the "save" parameter references a DIRECTORY, not a file. In that directory, there should be two files: - model.pth (model state dict) - valid_indices.pth (a list of indices corresponding to the validation set). data (str) - path to directory where data should be loaded from/downloaded save (str) - directory with necessary files (see above) """ # Load model state dict model_filename = os.path.join(save, 'model.pth') if not os.path.exists(model_filename): raise RuntimeError('Cannot find file %s to load' % model_filename) state_dict = torch.load(model_filename) # Load validation indices valid_indices_filename = os.path.join(save, 'valid_indices.pth') if not os.path.exists(valid_indices_filename): raise RuntimeError('Cannot find file %s to load' % valid_indices_filename) valid_indices = torch.load(valid_indices_filename) # Regenerate validation set loader mean = [0.5071, 0.4867, 0.4408] stdv = [0.2675, 0.2565, 0.2761] test_transforms = tv.transforms.Compose([ tv.transforms.ToTensor(), tv.transforms.Normalize(mean=mean, std=stdv), ]) valid_set = tv.datasets.CIFAR100(data, train=True, transform=test_transforms, download=True) valid_loader = torch.utils.data.DataLoader( valid_set, pin_memory=True, batch_size=batch_size, sampler=SubsetRandomSampler(valid_indices)) # Load original model if (depth - 4) % 3: raise Exception('Invalid depth') block_config = [(depth - 4) // 6 for _ in range(3)] orig_model = DenseNet(growth_rate=growth_rate, block_config=block_config, num_classes=100).cuda() orig_model.load_state_dict(state_dict) # Now we're going to wrap the model with a decorator that adds temperature scaling model = ModelWithTemperature(orig_model) # Tune the model temperature, and save the results model.set_temperature(valid_loader) model_filename = os.path.join(save, 'model_with_temperature.pth') torch.save(model.state_dict(), model_filename) print('Temperature scaled model sved to %s' % model_filename) print('Done!')
t_backward.append(t_bp) # free memory del model return t_forward, t_backward use_cuda = True multigpus = True # set cudnn backend to benchmark config cudnn.benchmark = True # instantiate the models densenet = DenseNet(efficient=False) densnet_effi = DenseNet(efficient=True) # build dummy variables to input and output x = torch.randn(128, 3, 32, 32) y = torch.randn(128, 100) if use_cuda: densenet = densenet.cuda() densnet_effi = densnet_effi.cuda() x = x.cuda() y = y.cuda() if multigpus: densenet = nn.DataParallel(densenet, device_ids=[0, 1]) densnet_effi = nn.DataParallel(densnet_effi, device_ids=[0, 1]) # build the dict to iterate over architectures = {'densenet': densenet, 'densenet-effi': densnet_effi}
if (args.eval_set == 'holdout'): print('Evaluation Selection: U2OS (holdout)\t\t\tSize:', len(U2OS_test_data)) test_loader = U2OS_test_loader elif (args.eval_set == 'combined'): print('Evaluation Selection: HEPG2, HUVEC, RPE\t\t\tSize:', len(combined_test_data)) test_loader = combined_test_loader else: print('Evaluation Selection: INVALID') combined_test_loader = U2OS_test_loader net = None if (args.model_type == "densenet"): print("Model: densenet") net = DenseNet(74).to('cuda') elif (args.model_type == "kaggle"): print("Model: kaggle") net = ModelAndLoss(74).to('cuda') elif(args.model_type == "multitask"): print("Model: multitask") net = MultitaskNet(74).to('cuda') elif(args.model_type == "lr"): print("Model: lr") net = LogisticRegression(512*512*6, 74).to('cuda') elif(args.model_type == "cnn"): print("Model: cnn") net = CNN(74).to('cuda') else: print("invalid model type")
# some default params dataset/architecture related train_params = train_params_cifar print("Params:") for k, v in model_params.items(): print("\t%s: %s" % (k, v)) print("Train params:") for k, v in train_params.items(): print("\t%s: %s" % (k, v)) model_params['use_Y'] = True print("Prepare training data...") data_provider = get_data_provider_by_name(model_params['dataset'], train_params) print("Initialize the model..") tf.reset_default_graph() model = DenseNet(data_provider=data_provider, **model_params) print("Loading trained model") model.load_model() print("Data provider test images: ", data_provider.test.num_examples) print("Testing...") loss, accuracy = model.test(data_provider.test, batch_size=30) print("mean cross_entropy: %f, mean accuracy: %f" % (loss, accuracy)) total_prediction, y_test = model.predictions_test(data_provider.test, batch_size=100) # Plotting incorrect examples incorrectlist = [] for i in range(len(total_prediction)): #if not correctness(y_test[i],total_prediction[i]): for j in range(len(y_test[i])):