def pretrain_generator(netG, module, param, batch_size): outel = list(netG._modules.values())[-1].weight.shape[0] dwidth = min(1024, outel) netD = make_netD(dwidth, batch_size) print( f"Layer size: {outel}, G params: {param_count(netG)}, D params: {param_count(netD)}" ) optimG = AdamW(netG.parameters(), lr=5e-4, weight_decay=1e-4) optimD = AdamW(netD.parameters(), lr=5e-5, weight_decay=1e-4) i = 0 d_adv_meter = AverageMeter() while True: netG.zero_grad() netD.zero_grad() z = fast_randn((batch_size, 256), requires_grad=True, device=device) q = netG(z) free_params([netD]) freeze_params([netG]) noise = codes_with_dropout(generate_noise(module, param, batch_size), dwidth) codes = codes_with_dropout(q, dwidth) d_real = netD(noise) d_fake = netD(codes) interp = random_interpolate(noise, codes, device=device) gp = calc_gradient_penalty(netD, interp, device=device) d_adv = d_fake.mean() - d_real.mean() d_loss = d_adv + 10 * gp d_adv_meter.update(d_adv.item()) d_loss.backward(retain_graph=True) optimD.step() freeze_params([netD]) free_params([netG]) d_fake_loss = -d_fake.mean() d_fake_loss.backward() optimG.step() if i % 50 == 0: print(d_adv_meter.avg, gp.item()) if i > 2000 and d_adv_meter.avg > 0: break d_adv_meter.reset() i += 1
def main(config): seed_all() os.makedirs('cache', exist_ok=True) os.makedirs(config.logdir, exist_ok=True) print("Logging to: %s" % config.logdir) src_files = sorted(glob('*.py')) for src_fn in src_files: dst_fn = os.path.join(config.logdir, src_fn) copyfile(src_fn, dst_fn) train_image_fns = sorted(glob(os.path.join(config.train_dir, '*.jpg'))) test_image_fns = sorted(glob(os.path.join(config.test_dir, '*.jpg'))) assert len(train_image_fns) == 3881 assert len(test_image_fns) == 4150 gt, label_to_int = load_gt(config.train_rle) int_to_label = {v: k for k, v in label_to_int.items()} # create folds np.random.shuffle(train_image_fns) if config.subset > 0: train_image_fns = train_image_fns[:config.subset] folds = np.arange(len(train_image_fns)) % config.num_folds val_image_fns = [ fn for k, fn in enumerate(train_image_fns) if folds[k] == config.fold ] train_image_fns = [ fn for k, fn in enumerate(train_image_fns) if folds[k] != config.fold ] if config.add_val: print("Training on validation set") train_image_fns = train_image_fns + val_image_fns[:] print(len(val_image_fns), len(train_image_fns)) # TODO: drop empty images <- is this helpful? train_image_fns = [ fn for fn in train_image_fns if KuzushijiDataset.fn_to_id(fn) in gt ] val_image_fns = [ fn for fn in val_image_fns if KuzushijiDataset.fn_to_id(fn) in gt ] print("VAL: ", len(val_image_fns), val_image_fns[123]) print("TRAIN: ", len(train_image_fns), train_image_fns[456]) train_ds = KuzushijiDataset(train_image_fns, gt_boxes=gt, label_to_int=label_to_int, augment=True) val_ds = KuzushijiDataset(val_image_fns, gt_boxes=gt, label_to_int=label_to_int) if config.cache: train_ds.cache() val_ds.cache() val_loader = data.DataLoader(val_ds, batch_size=config.batch_size // 8, shuffle=False, num_workers=config.num_workers, pin_memory=config.pin, drop_last=False) model = FPNSegmentation(config.slug) if config.weight is not None: print("Loading: %s" % config.weight) model.load_state_dict(th.load(config.weight)) model = model.to(config.device) no_decay = ['mean', 'std', 'bias'] + ['.bn%d.' % i for i in range(100)] grouped_parameters = [{ 'params': [], 'weight_decay': config.weight_decay }, { 'params': [], 'weight_decay': 0.0 }] for n, p in model.named_parameters(): if not any(nd in n for nd in no_decay): # print("Decay: %s" % n) grouped_parameters[0]['params'].append(p) else: # print("No Decay: %s" % n) grouped_parameters[1]['params'].append(p) optimizer = AdamW(grouped_parameters, lr=config.lr) if config.apex: model, optimizer = apex.amp.initialize(model, optimizer, opt_level="O1", verbosity=0) updates_per_epoch = len(train_ds) // config.batch_size num_updates = int(config.epochs * updates_per_epoch) scheduler = WarmupLinearSchedule(warmup=config.warmup, t_total=num_updates) # training loop smooth = 0.1 best_acc = 0.0 best_fn = None global_step = 0 for epoch in range(1, config.epochs + 1): smooth_loss = None smooth_accuracy = None model.train() train_loader = data.DataLoader(train_ds, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, pin_memory=config.pin, drop_last=True) progress = tqdm(total=len(train_ds), smoothing=0.01) if True: for i, (X, fns, hm, centers, classes) in enumerate(train_loader): X = X.to(config.device).float() hm = hm.to(config.device) centers = centers.to(config.device) classes = classes.to(config.device) hm_pred, classes_pred = model(X, centers=centers) loss = kuzushiji_loss(hm, centers, classes, hm_pred, classes_pred) if config.apex: with apex.amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() lr_this_step = None if (i + 1) % config.accumulation_step == 0: optimizer.step() optimizer.zero_grad() lr_this_step = config.lr * scheduler.get_lr( global_step, config.warmup) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step global_step += 1 smooth_loss = loss.item() if smooth_loss is None else \ smooth * loss.item() + (1. - smooth) * smooth_loss # print((y_true >= 0.5).sum().item()) accuracy = th.mean( ((th.sigmoid(hm_pred) >= 0.5) == (hm == 1)).to( th.float)).item() smooth_accuracy = accuracy if smooth_accuracy is None else \ smooth * accuracy + (1. - smooth) * smooth_accuracy progress.set_postfix( ep='%d/%d' % (epoch, config.epochs), loss='%.4f' % smooth_loss, accuracy='%.4f' % (smooth_accuracy), lr='%.6f' % (config.lr if lr_this_step is None else lr_this_step)) progress.update(len(X)) # skip validation if epoch not in [10, 20, 30, 40, 50]: if 1 < epoch <= 65: continue # validation loop model.eval() progress = tqdm(enumerate(val_loader), total=len(val_loader)) hm_correct, classes_correct = 0, 0 num_hm, num_classes = 0, 0 with th.no_grad(): for i, (X, fns, hm, centers, classes) in progress: X = X.to(config.device).float() hm = hm.cuda() centers = centers.cuda() classes = classes.cuda() hm_pred, classes_pred = model(X) hm_pred = th.sigmoid(hm_pred) classes_pred = th.nn.functional.softmax(classes_pred, 1) hm_cuda = hm.cuda() # PyTorch 1.2 has `bool` if hasattr(hm_cuda, 'bool'): hm_cuda = hm_cuda.bool() hm_correct += (hm_cuda == (hm_pred >= 0.5)).float().sum().item() num_hm += np.prod(hm.shape) num_samples = len(X) for sample_ind in range(num_samples): center_mask = centers[sample_ind, :, 0] != -1 per_image_letters = center_mask.sum().item() if per_image_letters == 0: continue num_classes += per_image_letters centers_per_img = centers[sample_ind][center_mask] classes_per_img = classes[sample_ind][center_mask] classes_per_img_pred = classes_pred[ sample_ind][:, centers_per_img[:, 1], centers_per_img[:, 0]].argmax(0) classes_correct += ( classes_per_img_pred == classes_per_img).sum().item() num_classes += per_image_letters val_hm_acc = hm_correct / num_hm val_classes_acc = classes_correct / num_classes summary_str = 'f%02d-ep-%04d-val_hm_acc-%.4f-val_classes_acc-%.4f' % ( config.fold, epoch, val_hm_acc, val_classes_acc) progress.write(summary_str) if val_classes_acc >= best_acc: weight_fn = os.path.join(config.logdir, summary_str + '.pth') progress.write("New best: %s" % weight_fn) th.save(model.state_dict(), weight_fn) best_acc = val_classes_acc best_fn = weight_fn fns = sorted( glob(os.path.join(config.logdir, 'f%02d-*.pth' % config.fold))) for fn in fns[:-config.n_keep]: os.remove(fn) # create submission test_ds = KuzushijiDataset(test_image_fns) test_loader = data.DataLoader(test_ds, batch_size=config.batch_size // 8, shuffle=False, num_workers=config.num_workers, pin_memory=False, drop_last=False) if best_fn is not None: model.load_state_dict(th.load(best_fn)) model.eval() sub = create_submission(model, test_loader, int_to_label, config, pred_zip=config.pred_zip) sub.to_csv(config.submission_fn, index=False) print("Wrote to: %s" % config.submission_fn) # create val submission val_fn = config.submission_fn.replace('.csv', '_VAL.csv') model.eval() sub = [] sub = create_submission(model, val_loader, int_to_label, config, pred_zip=config.pred_zip.replace( '.zip', '_VAL.zip')) sub.to_csv(val_fn, index=False) print("Wrote to: %s" % val_fn)
def main(config): seed_all() os.makedirs('cache', exist_ok=True) os.makedirs(config.logdir, exist_ok=True) print("Logging to: %s" % config.logdir) src_files = sorted(glob('*.py')) for src_fn in src_files: dst_fn = os.path.join(config.logdir, src_fn) copyfile(src_fn, dst_fn) train_image_fns = sorted(glob(os.path.join(config.train_dir, '*/*/*.dcm'))) test_image_fns = sorted(glob(os.path.join(config.test_dir, '*/*/*.dcm'))) # assert len(train_image_fns) == 10712 # assert len(test_image_fns) == 1377 gt = load_gt(config.train_rle) # create folds np.random.shuffle(train_image_fns) if config.subset > 0: train_image_fns = train_image_fns[:config.subset] folds = np.arange(len(train_image_fns)) % config.num_folds val_image_fns = [fn for k, fn in enumerate(train_image_fns) if folds[k] == config.fold] train_image_fns = [fn for k, fn in enumerate(train_image_fns) if folds[k] != config.fold] # remove not-used files: # https://www.kaggle.com/c/siim-acr-pneumothorax-segmentation/discussion/98478#latest-572385 # noqa train_image_fns = [fn for fn in train_image_fns if DicomDataset.fn_to_id(fn) in gt] val_image_fns = [fn for fn in val_image_fns if DicomDataset.fn_to_id(fn) in gt] print("VAL: ", len(val_image_fns), os.path.basename(val_image_fns[0])) print("TRAIN: ", len(train_image_fns), os.path.basename(train_image_fns[0])) train_ds = DicomDataset(train_image_fns, gt_rles=gt, augment=True) val_ds = DicomDataset(val_image_fns, gt_rles=gt) if config.cache: train_ds.cache() val_ds.cache() val_loader = data.DataLoader(val_ds, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers, pin_memory=config.pin, drop_last=False) model = FPNSegmentation(config.slug, ema=config.ema) if config.weight is not None: print("Loading: %s" % config.weight) model.load_state_dict(th.load(config.weight)) model = model.to(config.device) no_decay = ['mean', 'std', 'bias'] + ['.bn%d.' % i for i in range(100)] grouped_parameters = [{'params': [], 'weight_decay': config.weight_decay}, {'params': [], 'weight_decay': 0.0}] for n, p in model.named_parameters(): if not any(nd in n for nd in no_decay): print("Decay: %s" % n) grouped_parameters[0]['params'].append(p) else: print("No Decay: %s" % n) grouped_parameters[1]['params'].append(p) optimizer = AdamW(grouped_parameters, lr=config.lr) if config.apex: model, optimizer = apex.amp.initialize(model, optimizer, opt_level="O1", verbosity=0) updates_per_epoch = len(train_ds) // config.batch_size num_updates = int(config.epochs * updates_per_epoch) scheduler = WarmupLinearSchedule(warmup=config.warmup, t_total=num_updates) # training loop smooth = 0.1 best_dice = 0.0 best_fn = None global_step = 0 for epoch in range(1, config.epochs + 1): smooth_loss = None smooth_accuracy = None model.train() train_loader = data.DataLoader(train_ds, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, pin_memory=config.pin, drop_last=True) progress = tqdm(total=len(train_ds), smoothing=0.01) for i, (X, _, y_true) in enumerate(train_loader): X = X.to(config.device).float() y_true = y_true.to(config.device) y_pred = model(X) loss = siim_loss(y_true, y_pred, weights=None) if config.apex: with apex.amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() lr_this_step = None if (i + 1) % config.accumulation_step == 0: optimizer.step() optimizer.zero_grad() lr_this_step = config.lr * scheduler.get_lr(global_step, config.warmup) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step global_step += 1 smooth_loss = loss.item() if smooth_loss is None else \ smooth * loss.item() + (1. - smooth) * smooth_loss # print((y_true >= 0.5).sum().item()) accuracy = th.mean(((y_pred >= 0.5) == (y_true == 1)).to( th.float)).item() smooth_accuracy = accuracy if smooth_accuracy is None else \ smooth * accuracy + (1. - smooth) * smooth_accuracy progress.set_postfix(ep='%d/%d' % (epoch, config.epochs), loss='%.4f' % smooth_loss, accuracy='%.4f' % (smooth_accuracy), lr='%.6f' % (config.lr if lr_this_step is None else lr_this_step)) progress.update(len(X)) if epoch <= 12: continue # validation loop model.eval() thresholds = [0.1, 0.2] dice_coeffs = [[] for _ in range(len(thresholds))] progress = tqdm(enumerate(val_loader), total=len(val_loader)) with th.no_grad(): for i, (X, _, y_trues) in progress: X = X.to(config.device).float() y_trues = y_trues.to(config.device) y_preds = model(X) y_preds_flip = th.flip(model(th.flip(X, (-1, ))), (-1, )) y_preds = 0.5 * (y_preds + y_preds_flip) y_trues = y_trues.cpu().numpy() y_preds = y_preds.cpu().numpy() for yt, yp in zip(y_trues, y_preds): yt = (yt.squeeze() >= 0.5).astype('uint8') yp = yp.squeeze() for dind, threshold in enumerate(thresholds): yp_ = (yp >= threshold).astype(np.uint8) sc = score(yt, yp_) dice_coeffs[dind].append(sc) best_threshold_ind = -1 dice_coeff = -1 for dind, threshold in enumerate(thresholds): dc = np.mean([x[0] for x in dice_coeffs[dind] if x[1] == 'non-empty']) # progress.write("Dice @%.2f: %.4f" % (threshold, dc)) if dc > dice_coeff: dice_coeff = dc best_threshold_ind = dind dice_coeffs = dice_coeffs[best_threshold_ind] num_empty = sum(1 for x in dice_coeffs if x[1] == 'empty') num_total = len(dice_coeffs) num_non_empty = num_total - num_empty empty_sum = np.sum([d[0] for d in dice_coeffs if d[1] == 'empty']) non_empty_sum = np.sum([d[0] for d in dice_coeffs if d[1] == 'non-empty']) dice_coeff_empty = empty_sum / num_empty dice_coeff_non_empty = non_empty_sum / num_non_empty progress.write('[Empty: %d]: %.3f | %.3f, [Non-Empty: %d]: %.3f | %.3f' % ( num_empty, dice_coeff_empty, empty_sum / num_total, num_non_empty, dice_coeff_non_empty, non_empty_sum / num_total)) dice_coeff = float(dice_coeff) summary_str = 'f%02d-ep-%04d-val_dice-%.4f@%.2f' % (config.fold, epoch, dice_coeff, thresholds[best_threshold_ind]) progress.write(summary_str) if dice_coeff > best_dice: weight_fn = os.path.join(config.logdir, summary_str + '.pth') th.save(model.state_dict(), weight_fn) best_dice = dice_coeff best_fn = weight_fn fns = sorted(glob(os.path.join(config.logdir, 'f%02d-*.pth' % config.fold))) for fn in fns[:-config.n_keep]: os.remove(fn) # create submission test_ds = DicomDataset(test_image_fns) test_loader = data.DataLoader(test_ds, batch_size=config.batch_size, shuffle=False, num_workers=0, pin_memory=False, drop_last=False) if best_fn is not None: model.load_state_dict(th.load(best_fn)) model.eval() sub = create_submission(model, test_loader, config, pred_zip=config.pred_zip) sub.to_csv(config.submission_fn, index=False) print("Wrote to: %s" % config.submission_fn) # create val submission val_fn = config.submission_fn.replace('.csv', '_VAL.csv') model.eval() sub = [] sub = create_submission(model, val_loader, config, pred_zip=config.pred_zip.replace('.zip', '_VAL.zip')) sub.to_csv(val_fn, index=False) print("Wrote to: %s" % val_fn)
def train(name, loader, checkpoint, num_rep, lr, beta1, gamma_gan, num_epochs, wd, device): discriminator = Discriminator().to(device) generator = Generator(num_rep).to(device) losses = {'D': [], 'G': []} optimizer_D = AdamW(discriminator.parameters(), lr=lr, weight_decay=wd, betas=(beta1, 0.99)) optimizer_G = AdamW(generator.parameters(), lr=lr, weight_decay=wd, betas=(beta1, 0.99)) bce = nn.BCELoss() mse = nn.MSELoss() normalizer = Normalizer(cfg.mean, cfg.std, device) if torch.cuda.device_count() > 1: generator = nn.DataParallel(generator) discriminator = nn.DataParallel(discriminator) save_path = Path('.') / 'save' / name if not save_path.is_dir(): save_path.mkdir(parents=True) if checkpoint: losses = load_checkpoint(save_path, discriminator, generator, optimizer_D, optimizer_G) last_epoch = len(losses['D']) - 1 logging.info('Last epoch={}'.format(last_epoch)) for epoch in range(last_epoch + 1, num_epochs): losses_G = 0.0 losses_D = 0.0 loss_G_gan_acc = 0.0 loss_G_M_acc = 0.0 iter_count = 0 for image, gt, _ in loader: batchsize = image.size(0) image, gt = image.to(device), gt.to(device) # Phrase 1: train the D discriminator.zero_grad() labels = torch.full((batchsize, 1), 1, device=device) output = discriminator(gt) D_x = output.mean().item() loss_D_real = bce(output, labels) loss_D_real.backward() fake = generator(image) fake = normalizer(fake) labels.fill_(0) output = discriminator(fake.detach()) D_G_z1 = output.mean().item() loss_D_fake = bce(output, labels) loss_D_fake.backward() loss_D = loss_D_real.item() + loss_D_fake.item() optimizer_D.step() # Phrase 2: train the G generator.zero_grad() output = discriminator(fake) D_G_z2 = output.mean().item() labels.fill_(1) loss_G_gan = bce(output, labels) loss_G_gan_acc += loss_G_gan.item() loss_G_M = mse(fake, gt) loss_G_M_acc += loss_G_M.item() loss_G = gamma_gan * loss_G_gan + loss_G_M loss_G.backward() optimizer_G.step() losses_D += loss_D losses_G += loss_G.item() if iter_count % 20 == 0: logging.info( "Iteration {} loss -- Loss D {:.4f}, " "Loss G {:.4f}, D(x) {:.4f} D(g(z)) {:.4f} / {:.4f}". format(iter_count, loss_D, loss_G, D_x, D_G_z1, D_G_z2)) iter_count += 1 logging.info("D Loss: {:.4f}, G Loss: {:.4f} at epoch {}.".format( losses_D, losses_G, epoch)) logging.info('loss_G_gan_acc={:.4f}, loss_G_M_acc={:.4f}'.format( loss_G_gan_acc, loss_G_M_acc)) losses['D'].append(losses_D) losses['G'].append(losses_G) if checkpoint: save_checkpoint(save_path, discriminator, generator, optimizer_D, optimizer_G, losses)
def objective(SCI_SGD_MOMENTUM, SCI_DROPOUT, SCI_BATCH_SIZE, SCI_L_SECOND, SCI_optimizer, LINEARITY): global SCI_REGULARIZATION, SCI_EPOCHS, SCI_loss_type, SCI_RELU global SCI_BIAS, SCI_BN_MOMENTUM, device, SCI_LR, MaxCredit, count, CreditVector, CreditVec SCI_SGD_MOMENTUM = SCI_SGD_MOMENTUM / 10 DROPOUT = (SCI_DROPOUT / 2).item() if SCI_DROPOUT < 0 : DROPOUT = 0 BATCH_SIZE = int(SCI_BATCH_SIZE) if SCI_L_SECOND < 4 : SCI_L_SECOND = 4 if SCI_optimizer < 1 : SCI_optimizer = 1 L_SECOND = int(SCI_L_SECOND) loss_func = nn.CrossEntropyLoss() def create_loss(LOSS): if LOSS == 'CrossEntropyLoss': loss_func = nn.CrossEntropyLoss() if LOSS == 'NLLLoss': loss_func = nn.NLLLoss() else: loss_func = nn.MultiMarginLoss() return loss_func REGULARIZATION = float(str(SCI_REGULARIZATION)) optimizer1 = str(SCI_optimizer) from cnn_model import CNN6 cnn = CNN6(L_FIRST, L_SECOND, KERNEL_X, SCI_BIAS, SCI_BN_MOMENTUM, SCI_RELU, DROPOUT, dataset.CLASSES, LINEARITY) if GPU_SELECT == 2: if torch.cuda.device_count() > 1: cnn = nn.DataParallel(cnn, device_ids=[0, 1], dim=0) cnn = cnn.cuda() if GPU_SELECT == 1: cnn.to(device) if GPU_SELECT == 0: cnn.to(device) cnn.apply(CNN6.weights_reset) cnn.share_memory() train_losses = [] # to track the training loss as the model trains output = 0 loss = 0 accuracy = 0 early_stopping.counter = 0 early_stopping.best_score = None early_stopping.early_stop = False early_stopping.verbose = False TEST_RESULTS = torch.zeros(1, 2) loss_type = create_loss(SCI_loss_type) from adamw import AdamW if optimizer1 == '1': optimizer = optim.Adam(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION) if optimizer1 == '2': optimizer = optim.Adam(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION, amsgrad=True) if optimizer1 == '3': optimizer = AdamW(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION) if optimizer1 == '4': optimizer = optim.SGD(cnn.parameters(), lr=SCI_LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION) if optimizer1 == '5': optimizer = optim.Adadelta(cnn.parameters(), lr=SCI_LR, weight_decay=REGULARIZATION) if optimizer1 == '6': optimizer = optim.Adagrad(cnn.parameters(), lr=SCI_LR, weight_decay=REGULARIZATION) if optimizer1 > '6': optimizer = optim.Adam(cnn.parameters(), lr=SCI_LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION) from Utillities import Utillities Utillities.listing(optimizer, SCI_SGD_MOMENTUM, SCI_BN_MOMENTUM, L_SECOND, SCI_LR, SCI_RELU, SCI_BIAS, SCI_loss_type, REGULARIZATION, BATCH_SIZE, DROPOUT, LINEARITY) train_loader = Data.DataLoader(dataset=dataset.train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, drop_last=True, pin_memory=True) validation_loader = Data.DataLoader(dataset=dataset.validation_dataset, batch_size=144, shuffle=False, num_workers=0, drop_last=True, pin_memory=True) test_loader = Data.DataLoader(dataset=dataset.test_dataset, batch_size=599, shuffle=False, num_workers=0, pin_memory=True, drop_last=True) for epoch in range(SCI_EPOCHS): loss = None cnn.train().cuda() for step, (train_data, train_target) in enumerate(train_loader): train_data, train_target = train_data.to(device), train_target.to(device) output, temp = cnn(train_data) # forward pass: compute predicted outputs by passing inputs to the model loss = loss_func(output, train_target) train_losses.append(loss.item()) # record training loss loss.backward() # backward pass: compute gradient of the loss with respect to model parameters optimizer.zero_grad() optimizer.step() # perform a single optimization step (parameter update) cnn.eval().cuda() # switch to evaluation (no change) mode valid_loss = 0 accuracy = 0 with torch.no_grad(): for step, (validation_data, validation_target) in enumerate(validation_loader): validation_data, validation_target = validation_data.to(device), validation_target.to(device) output, temp = cnn(validation_data) # forward pass: compute predicted outputs by passing inputs to the model valid_loss += loss_func(output, validation_target).item() ps = torch.exp(output) equality = (validation_target[0].data == ps.max(dim=1)[1]) accuracy += equality.type(torch.FloatTensor).mean() train_losses = [] early_stopping(valid_loss, cnn) if early_stopping.early_stop: if os.path.exists('checkpoint.pt'): print("Loaded the model with the lowest Validation Loss!") cnn.load_state_dict(torch.load('checkpoint.pt', map_location="cuda:1")) # Choose whatever GPU device number you want cnn.to(device) break cnn.eval() class_correct = list(0. for i in range(1000)) class_total = list(0. for i in range(1000)) with torch.no_grad(): for (test_data, test_target) in test_loader: test_data, test_target = test_data.to(device), test_target.to(device) outputs, temp = cnn(test_data) _, predicted = torch.max(outputs, 1) c = (predicted == test_target).squeeze() for i in range(test_target.size(0)): label = test_target[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(dataset.CLASSES): TEST_RESULTS[0, i] = class_correct[i] / dataset.TESTED_ELEMENTS[i] print('Class: ', i, ' accuracy: ', TEST_RESULTS[0, i]) print('Class: ', i, ' correct: ', class_correct[i]) percent = (TEST_RESULTS[0, 0] + TEST_RESULTS[0, 1]) / 2 print('Final percentage: ', percent) CreditCost = (1 - TEST_RESULTS[0, 0]) * dataset.TESTED_ELEMENTS[0] + (1 - TEST_RESULTS[0, 1]) * dataset.TESTED_ELEMENTS[1] * 5 if TEST_RESULTS[0, 0] == 0 or TEST_RESULTS[0, 1] == 0 : CreditCost = CreditCost + 300 print('Last epoch: ', epoch) if os.path.exists('checkpoint.pt'): os.remove('checkpoint.pt') print() torch.cuda.empty_cache() print() CreditCost = CreditCost + (SCI_SGD_MOMENTUM + SCI_DROPOUT + SCI_BATCH_SIZE + SCI_L_SECOND + SCI_optimizer) / 1000 print('Credit Cost: ', CreditCost) if -CreditCost > MaxCredit : MaxCredit = -CreditCost print('Best Score So Far: ', MaxCredit) CreditVector[count] = MaxCredit CreditVec[count] = count # plot the data fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.plot(CreditVec, -CreditVector, color='tab:orange') # print(CreditVec, -CreditVector) count = count + 1 # display the plot plt.show() return CreditCost
def objective(SCI_RELU, SCI_BIAS, SCI_loss_type, SCI_optimizer, SCI_BATCH_SIZE, SCI_MM, SCI_REGULARIZATION, SCI_LR, SCI_DROPOUT, SCI_L_SECOND, SCI_EPOCHS, SCI_BN_MOMENTUM, SCI_SGD_MOMENTUM, SCI_LINEARITY): global device, MaxCredit global count, CreditVector, CreditVec SCI_BATCH_SIZE = int(SCI_BATCH_SIZE) # integer between 4 and 256 SCI_MM = round(SCI_MM, 3) # real with three decimals between (0.001, 0.999) SCI_REGULARIZATION = round(SCI_REGULARIZATION, 3) # real with three decimals between (0.001, 0.7) SCI_LR = round(SCI_LR, 5) # real with five decimals between(1e-4, 7e-1) SCI_DROPOUT = round(SCI_DROPOUT, 2) # real with two decimals between (0, 0.4) SCI_L_SECOND = int(SCI_L_SECOND) # integer between 2 and 64 SCI_EPOCHS = int(SCI_EPOCHS) # integer between (100, 500) SCI_BN_MOMENTUM = round(SCI_BN_MOMENTUM, 2) # real with two decimals between (0, 0.99) SCI_SGD_MOMENTUM = round(SCI_SGD_MOMENTUM, 2) # real with two decimals between (0, 0.99) SCI_optimizer = int(SCI_optimizer) # integer between 1 and 4 SCI_loss_type = int(SCI_loss_type) # integer between 1 and 3 ('CrossEntropyLoss', 'MultiMarginLoss','NLLLoss') SCI_LINEARITY = int(SCI_LINEARITY) if int(SCI_RELU) == 1 : # integer between 1 and 2 ('True', 'False') SCI_RELU = True else: SCI_RELU = False if int(SCI_BIAS) == 1 : # integer between 1 and 2 ('True', 'False') SCI_BIAS = True else: SCI_BIAS = False from cnn_model import CNN6 cnn = CNN6(L_FIRST, SCI_L_SECOND, KERNEL_X, SCI_BIAS, SCI_BN_MOMENTUM, SCI_RELU, SCI_DROPOUT, dataset.CLASSES, SCI_LINEARITY) if GPU_SELECT == 2: if torch.cuda.device_count() > 1: cnn = nn.DataParallel(cnn, device_ids=[0, 1], dim=0) cnn = cnn.cuda() if GPU_SELECT == 1: cnn.to(device) if GPU_SELECT == 0: cnn.to(device) # next(cnn.parameters()).is_cuda # print(cnn) # net architecture # list(cnn.parameters()) cnn.apply(CNN6.weights_reset) cnn.share_memory() loss_func = nn.CrossEntropyLoss() def create_loss(LOSS): if LOSS == 1: loss_func = nn.CrossEntropyLoss() if LOSS == 2: loss_func = nn.NLLLoss() else: loss_func = nn.MultiMarginLoss() return loss_func MM = float(str(SCI_MM)) REGULARIZATION = float(str(SCI_REGULARIZATION)) # optimizer = str(SCI_optimizer) LR = float(str(SCI_LR)) train_losses = [] # to track the training loss as the model trains output = 0 loss = 0 accuracy = 0 early_stopping.counter = 0 early_stopping.best_score = None early_stopping.early_stop = False early_stopping.verbose = False TEST_RESULTS = torch.zeros(1, 2) loss_type = create_loss(SCI_loss_type) from adamw import AdamW if SCI_optimizer == 1: optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION) if SCI_optimizer == 2: optimizer = optim.Adam(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION, amsgrad=True) if SCI_optimizer == 3: optimizer = AdamW(cnn.parameters(), lr=LR, betas=(0.9, 0.99), weight_decay=REGULARIZATION) if SCI_optimizer == 4: optimizer = optim.SGD(cnn.parameters(), lr=LR, momentum=SCI_SGD_MOMENTUM, weight_decay=REGULARIZATION) if SCI_optimizer == 5: optimizer = optim.Adadelta(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION) if SCI_optimizer == 6: optimizer = optim.Adagrad(cnn.parameters(), lr=LR, weight_decay=REGULARIZATION) from Utillities import Utillities Utillities.listing(optimizer, SCI_SGD_MOMENTUM, SCI_BN_MOMENTUM, SCI_L_SECOND, SCI_LR, SCI_RELU, SCI_BIAS, SCI_loss_type, REGULARIZATION, SCI_BATCH_SIZE, SCI_DROPOUT, SCI_LINEARITY) # Data Loader for easy mini-batch return in training SCI_BATCH_SIZE = int(SCI_BATCH_SIZE) train_loader = Data.DataLoader(dataset=dataset.train_dataset, batch_size=SCI_BATCH_SIZE, shuffle=True, num_workers=0, drop_last=True, pin_memory=True) validation_loader = Data.DataLoader(dataset=dataset.validation_dataset, batch_size=144, shuffle=True, num_workers=0, drop_last=True, pin_memory=True) test_loader = Data.DataLoader(dataset=dataset.test_dataset, batch_size=599, shuffle=True, num_workers=0, drop_last=True, pin_memory=True) for epoch in range(SCI_EPOCHS): loss = None cnn.train().cuda() for step, (train_data, train_target) in enumerate(train_loader): train_data, train_target = train_data.to(device), train_target.to(device) output, temp = cnn(train_data) # forward pass: compute predicted outputs by passing inputs to the model loss = loss_func(output, train_target) train_losses.append(loss.item()) # record training loss loss.backward() # backward pass: compute gradient of the loss with respect to model parameters optimizer.zero_grad() optimizer.step() # perform a single optimization step (parameter update) cnn.eval().cuda() # switch to evaluation (no change) mode valid_loss = 0 accuracy = 0 running_loss = 0.0 with torch.no_grad(): for step, (validation_data, validation_target) in enumerate(validation_loader): validation_data, validation_target = validation_data.to(device), validation_target.to(device) output, temp = cnn(validation_data) # forward pass: compute predicted outputs by passing inputs to the model valid_loss += loss_func(output, validation_target).item() # ps = torch.exp(output) # equality = (validation_target[0].data == ps.max(dim=1)[1]) # accuracy += equality.type(torch.FloatTensor).mean() # print('valid_loss: ', valid_loss) # print statistics running_loss += valid_loss if epoch % 100 == 0: print('average loss: %.6f' % (running_loss)) running_loss = 0.0 train_losses = [] early_stopping(valid_loss, cnn) if early_stopping.early_stop: if os.path.exists('checkpoint.pt'): # cnn = TheModelClass(*args, **kwargs) print("Loaded the model with the lowest Validation Loss!") cnn.load_state_dict(torch.load('checkpoint.pt')) # Choose whatever GPU device number you want cnn.to(device) break cnn.eval() class_correct = list(0. for i in range(1000)) class_total = list(0. for i in range(1000)) with torch.no_grad(): for (test_data, test_target) in test_loader: test_data, test_target = test_data.to(device), test_target.to(device) outputs, temp = cnn(test_data) _, predicted = torch.max(outputs, 1) c = (predicted == test_target).squeeze() for i in range(test_target.size(0)): label = test_target[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(dataset.CLASSES): TEST_RESULTS[0, i] = class_correct[i] / dataset.TESTED_ELEMENTS[i] print('Class: ', i, ' accuracy: ', TEST_RESULTS[0, i]) print('Class: ', i, ' correct: ', class_correct[i], ' of ', dataset.TESTED_ELEMENTS[i]) percent = (TEST_RESULTS[0, 0] + TEST_RESULTS[0, 1]) / 2 print('Final percentage: ', percent) CreditCost = int((1 - TEST_RESULTS[0, 0]) * dataset.TESTED_ELEMENTS[0] + (1 - TEST_RESULTS[0, 1]) * dataset.TESTED_ELEMENTS[1] * 5) if TEST_RESULTS[0, 0] == 0 or TEST_RESULTS[0, 1] == 0 : CreditCost = CreditCost + 300 print('Last epoch: ', epoch) print('Credit Cost: ', -CreditCost) # list(cnn.parameters()) if os.path.exists('checkpoint.pt'): os.remove('checkpoint.pt') print() print() if -CreditCost > MaxCredit : MaxCredit = -CreditCost print('Best Score So Far: ', MaxCredit) CreditVector[count] = MaxCredit CreditVec[count] = count # plot the data fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.plot(CreditVec, -CreditVector, color='tab:blue') # print(CreditVec, -CreditVector) count = count + 1 # display the plot plt.show() return -CreditCost
# Iterate over the training set for i, sample in enumerate(progress_bar): if cuda: sample = move_to_cuda(sample) if len(sample) == 0: continue signals = sample['signals'] #signals = sample['signals'].unsqueeze(-1) output = model(signals) loss = cost(output, sample['target']) optimizer.zero_grad() loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_norm) optimizer.step() # Update statistics for progress bar total_loss = loss.item() stats['loss'] += total_loss stats['grad_norm'] += grad_norm stats['clip'] += 1 if grad_norm > clip_norm else 0 progress_bar.set_postfix({key: '{:.4g}'.format(value / (i + 1)) for key, value in stats.items()}, refresh=True) print('Epoch {:03d}: {}'.format(epoch, ' | '.join(key + ' {:.4g}'.format( value / len(progress_bar)) for key, value in stats.items()))) # Validation model.eval() dev_loader = torch.utils.data.DataLoader(dev_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, drop_last=True)
def train_gan(zq=256, ze=512, batch_size=32, outdir=".", name="tmp", dry=False, **kwargs): if not dry: tensorboard_path = Path(outdir) / 'tensorboard' / name model_path = Path(outdir) / 'models' / name tensorboard_path.mkdir(exist_ok=True, parents=True) model_path.mkdir(exist_ok=True, parents=True) sw = SummaryWriter(str(tensorboard_path)) netT = resnet20().to(device) # netT = SimpleConvNet(bias=False).to(device) netH = HyperNet(netT, ze, zq).to(device) print("Loading pretrained generators...") pretrain = torch.load('pretrained.pt') netH.load_state_dict(pretrain['netH']) netD = SimpleLinearNet( [zq * batch_size, zq * batch_size // 2, zq * batch_size // 4, 1024, 1], final_sigmoid=True, batchnorm=False).to(device) print(netT, netH, netD) print(f"netT params: {param_count(netT)}") print(f"netH params: {param_count(netH)}") print(f"netD params: {param_count(netD)}") generator_count = param_layer_count(netT) optimH = AdamW(netH.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4) optimD = AdamW(netD.parameters(), lr=5e-5, betas=(0.5, 0.9), weight_decay=1e-4) g_loss_meter, d_loss_meter = AverageMeter(), AverageMeter() d_acc_meter = AverageMeter() gp_meter = AverageMeter() dgrad_meter = AverageMeter() adversarial_loss = nn.BCELoss() real_label, fake_label = 0, 1 label = torch.zeros((generator_count, 1), device=device) ops = 0 start_time = time.time() minibatch_count = 1562 for epoch in range(100000): d_loss_meter.reset() g_loss_meter.reset() d_acc_meter.reset() gp_meter.reset() dgrad_meter.reset() # schedH.step() # schedD.step() for batch_idx in range(minibatch_count): n_iter = epoch * minibatch_count + batch_idx netH.zero_grad() netD.zero_grad() z = fast_randn((batch_size, ze), device=device, requires_grad=True) q = netH.encoder(z).view(-1, generator_count, zq) # Z Adversary free_params([netD]) freeze_params([netH]) codes = q.permute((1, 0, 2)).contiguous().view(generator_count, -1) noise = fast_randn((generator_count, zq * batch_size), device=device, requires_grad=True) d_real = netD(noise) d_fake = netD(codes) d_real_loss = adversarial_loss(d_real, label.fill_(real_label)) d_real_loss.backward(retain_graph=True) d_fake_loss = adversarial_loss(d_fake, label.fill_(fake_label)) d_fake_loss.backward(retain_graph=True) d_loss = d_real_loss + d_fake_loss # gp = calc_gradient_penalty(netD, noise, codes, device=device) # d_loss = d_fake.mean() - d_real.mean() + 10 * gp # d_loss.backward(retain_graph=True) dgrad_meter.update(model_grad_norm(netD)) d_loss_meter.update(d_loss.item()) d_acc_meter.update((sum(d_real < 0.5) + sum(d_fake > 0.5)).item() / (generator_count * 2)) # gp_meter.update(gp.item()) optimD.step() # schedD.batch_step() # Train the generator freeze_params([netD]) free_params([netH]) # fool the discriminator # d_fake_loss = -d_fake.mean() # d_fake_loss.backward() d_fake_loss = adversarial_loss(d_fake, label.fill_(real_label)) d_fake_loss.backward(retain_graph=True) optimH.step() with torch.no_grad(): """ Update Statistics """ if batch_idx % 50 == 0: current_time = time.time() ops_per_sec = ops // (current_time - start_time) start_time = current_time ops = 0 print("*" * 70 + " " + name) print("{}/{} D Loss: {}".format(epoch, batch_idx, d_loss.item())) print("{} ops/s".format(ops_per_sec)) ops += batch_size if batch_idx > 1 and batch_idx % 199 == 0: if not dry: sw.add_scalar('G/loss', g_loss_meter.avg, n_iter) sw.add_scalar('D/loss', d_loss_meter.avg, n_iter) sw.add_scalar('D/acc', d_acc_meter.avg, n_iter) sw.add_scalar('D/gp', gp_meter.avg, n_iter) sw.add_scalar('D/gradnorm', dgrad_meter.avg, n_iter) netH.eval() netH_samples = [ netH(fast_randn((batch_size, ze)).cuda()) for _ in range(10) ] netH.train() sw.add_scalar( 'G/g_var', sum( x.std(0).mean() for v in netH_samples for x in v[1].values()) / (generator_count * 10), n_iter) sw.add_scalar( 'G/q_var', torch.cat([ s[0].view(-1, zq) for s in netH_samples ]).var(0).mean(), n_iter) if kwargs['embeddings']: sw.add_embedding( q.view(-1, zq), global_step=n_iter, tag="q", metadata=list(range(generator_count)) * batch_size) torch.save( { 'netH': netH.state_dict(), 'netD': netD.state_dict() }, str(model_path / 'pretrain.pt'))