dictionary = Dictionary.load_from_file(dict_path) eval_dset = KairosFeatureDataset('infer', dictionary, args.dset) args.op = '' args.gamma = 1 n_device = torch.cuda.device_count() batch_size = args.batch_size * n_device constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(eval_dset, args.num_hid, args.op, args.gamma, args.task).cuda() model_data = torch.load(args.input + '/model' + ('_epoch%d' % args.epoch if 0 < args.epoch else '') + '.pth') model = nn.DataParallel(model).cuda() model.load_state_dict(model_data.get('model_state', model_data)) eval_loader = DataLoader(eval_dset, batch_size, shuffle=False, num_workers=1, collate_fn=utils.trim_collate) model.train(False) # pdb.set_trace() results = infer(model, eval_loader) np.save(f"data/{args.dset}/results.npy", results)
def net(model_name: str, num_bands: int, num_channels: int, dontcare_val: int, num_devices: int, train_state_dict_path: str = None, pretrained: bool = True, dropout_prob: float = False, loss_fn: str = None, optimizer: str = None, class_weights: Sequence = None, net_params=None, conc_point: str = None, coordconv_params=None, inference_state_dict: str = None): """Define the neural net""" msg = f'\nNumber of bands specified incompatible with this model. Requires 3 band data.' pretrained = False if train_state_dict_path or inference_state_dict else pretrained dropout = True if dropout_prob else False model = None if model_name == 'unetsmall': model = unet.UNetSmall(num_channels, num_bands, dropout, dropout_prob) elif model_name == 'unet': model = unet.UNet(num_channels, num_bands, dropout, dropout_prob) elif model_name == 'ternausnet': if not num_bands == 3: raise logging.critical(NotImplementedError(msg)) model = TernausNet.ternausnet(num_channels) elif model_name == 'checkpointed_unet': model = checkpointed_unet.UNetSmall(num_channels, num_bands, dropout, dropout_prob) elif model_name == 'inception': model = inception.Inception3(num_channels, num_bands) elif model_name == 'fcn_resnet101': if not num_bands == 3: raise logging.critical(NotImplementedError(msg)) model = models.segmentation.fcn_resnet101(pretrained=False, progress=True, num_classes=num_channels, aux_loss=None) elif model_name == 'deeplabv3_resnet101': if not (num_bands == 3 or num_bands == 4): raise logging.critical(NotImplementedError(msg)) if num_bands == 3: model = models.segmentation.deeplabv3_resnet101( pretrained=pretrained, progress=True) classifier = list(model.classifier.children()) model.classifier = nn.Sequential(*classifier[:-1]) model.classifier.add_module( '4', nn.Conv2d(classifier[-1].in_channels, num_channels, kernel_size=(1, 1))) elif num_bands == 4: model = models.segmentation.deeplabv3_resnet101( pretrained=pretrained, progress=True) if conc_point == 'baseline': logging.info( '\nTesting with 4 bands, concatenating at {}.'.format( conc_point)) conv1 = model.backbone._modules['conv1'].weight.detach().numpy( ) depth = np.expand_dims( conv1[:, 1, ...], axis=1) # reuse green weights for infrared. conv1 = np.append(conv1, depth, axis=1) conv1 = torch.from_numpy(conv1).float() model.backbone._modules['conv1'].weight = nn.Parameter( conv1, requires_grad=True) classifier = list(model.classifier.children()) model.classifier = nn.Sequential(*classifier[:-1]) model.classifier.add_module( '4', nn.Conv2d(classifier[-1].in_channels, num_channels, kernel_size=(1, 1))) else: classifier = list(model.classifier.children()) model.classifier = nn.Sequential(*classifier[:-1]) model.classifier.add_module( '4', nn.Conv2d(classifier[-1].in_channels, num_channels, kernel_size=(1, 1))) conc_point = 'conv1' if not conc_point else conc_point model = LayersEnsemble(model, conc_point=conc_point) logging.info( f'\nFinetuning pretrained deeplabv3 with {num_bands} input channels (imagery bands). ' f'Concatenation point: "{conc_point}"') elif model_name in lm_smp.keys(): lsmp = lm_smp[model_name] # TODO: add possibility of our own weights lsmp['params'][ 'encoder_weights'] = "imagenet" if 'pretrained' in model_name.split( "_") else None lsmp['params']['in_channels'] = num_bands lsmp['params']['classes'] = num_channels lsmp['params']['activation'] = None model = lsmp['fct'](**lsmp['params']) else: raise logging.critical( ValueError( f'\nThe model name {model_name} in the config.yaml is not defined.' )) coordconv_convert = get_key_def('coordconv_convert', coordconv_params, False) if coordconv_convert: centered = get_key_def('coordconv_centered', coordconv_params, True) normalized = get_key_def('coordconv_normalized', coordconv_params, True) noise = get_key_def('coordconv_noise', coordconv_params, None) radius_channel = get_key_def('coordconv_radius_channel', coordconv_params, False) scale = get_key_def('coordconv_scale', coordconv_params, 1.0) # note: this operation will not attempt to preserve already-loaded model parameters! model = coordconv.swap_coordconv_layers(model, centered=centered, normalized=normalized, noise=noise, radius_channel=radius_channel, scale=scale) if inference_state_dict: state_dict_path = inference_state_dict checkpoint = load_checkpoint(state_dict_path) return model, checkpoint, model_name else: if train_state_dict_path is not None: checkpoint = load_checkpoint(train_state_dict_path) else: checkpoint = None # list of GPU devices that are available and unused. If no GPUs, returns empty list gpu_devices_dict = get_device_ids(num_devices) num_devices = len(gpu_devices_dict.keys()) logging.info( f"Number of cuda devices requested: {num_devices}. " f"Cuda devices available: {list(gpu_devices_dict.keys())}\n") if num_devices == 1: logging.info( f"\nUsing Cuda device 'cuda:{list(gpu_devices_dict.keys())[0]}'" ) elif num_devices > 1: logging.info( f"\nUsing data parallel on devices: {list(gpu_devices_dict.keys())[1:]}. " f"Main device: 'cuda:{list(gpu_devices_dict.keys())[0]}'") try: # For HPC when device 0 not available. Error: Invalid device id (in torch/cuda/__init__.py). # DataParallel adds prefix 'module.' to state_dict keys model = nn.DataParallel(model, device_ids=list( gpu_devices_dict.keys())) except AssertionError: logging.warning( f"\nUnable to use devices with ids {gpu_devices_dict.keys()}" f"Trying devices with ids {list(range(len(gpu_devices_dict.keys())))}" ) model = nn.DataParallel( model, device_ids=list(range(len(gpu_devices_dict.keys())))) else: logging.warning( f"No Cuda device available. This process will only run on CPU\n" ) logging.info( f'\nSetting model, criterion, optimizer and learning rate scheduler...' ) device = torch.device( f'cuda:{list(range(len(gpu_devices_dict.keys())))[0]}' if gpu_devices_dict else 'cpu') try: # For HPC when device 0 not available. Error: Cuda invalid device ordinal. model.to(device) except AssertionError: logging.exception(f"Unable to use device. Trying device 0...\n") device = torch.device(f'cuda' if gpu_devices_dict else 'cpu') model.to(device) model, criterion, optimizer, lr_scheduler = set_hyperparameters( params=net_params, num_classes=num_channels, model=model, checkpoint=checkpoint, dontcare_val=dontcare_val, loss_fn=loss_fn, optimizer=optimizer, class_weights=class_weights, inference=inference_state_dict) criterion = criterion.to(device) return model, model_name, criterion, optimizer, lr_scheduler, device, gpu_devices_dict
start_epoch = 0 CHECKPOINT_PATH = FLAGS.checkpoint_path ''' if CHECKPOINT_PATH is not None and os.path.isfile(CHECKPOINT_PATH): checkpoint = torch.load(CHECKPOINT_PATH) net.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] log_string("-> loaded checkpoint %s (epoch: %d)"%(CHECKPOINT_PATH, start_epoch)) ''' if torch.cuda.device_count() > 1: log_string("Let's use %d GPUs!" % (torch.cuda.device_count())) # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs net = nn.DataParallel(net) ################################################# training functions ########################################### def adjust_learning_rate(optimizer, epoch): lr = optimizer.param_groups[0]['lr'] lr = lr * cfg.lr_decays[epoch] for param_group in optimizer.param_groups: param_group['lr'] = lr def train_one_epoch():
def main(): ngpu = 2 batchSize = 1 inputSize = 256 channel = 3 # load nets Uroot = "checkpoints/Umodel_50.pth" Droot = "checkpoints/dModel_50.pth" uModel = UNET(inputSize, inputSize, channel) dModel = DNET(batch=batchSize, nc=channel, inputSize=inputSize, nf=32) if ngpu: uModel = uModel.cuda() uModel = nn.DataParallel(uModel) if ngpu: dModel = dModel.cuda() uModel.load_state_dict(torch.load(Uroot)) dModel.load_state_dict(torch.load(Droot)) uModel.eval() dModel.eval() # load data # dataroot = "/home/cad/PycharmProjects/ContextEncoder/dataset/conference/val" dataroot = "/home/cad/PycharmProjects/ContextEncoder/dataset/disparityTest" # dataroot = "/home/cad/PycharmProjects/ContextEncoder/dataset/conference/test" dataset = dset.ImageFolder(root=dataroot, transform=transforms.Compose([ transforms.Scale(inputSize), transforms.CenterCrop(inputSize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batchSize, shuffle=True, num_workers=3, drop_last=True) # load mask mask = getMask(inputSize, batchSize, channel) # load loss function lossF = nn.MSELoss() lossF = nn.L1Loss() lossBCE = nn.BCELoss() fake_label = Variable(torch.FloatTensor(batchSize).fill_(0)) real_label = Variable(torch.FloatTensor(batchSize).fill_(1)) if ngpu: fake_label = fake_label.cuda() real_label = real_label.cuda() #running lossG_list = [] loss_img_list = [] loss_mask_list = [] loss_ad_list = [] img_real = None img_fake = None beginT = time.localtime() mask_gpu = Variable(1 - mask).cuda() for i, data in enumerate(dataloader, 0): img_data, _ = data img_target = img_data.clone() img_target = Variable(img_target) img_data = initData(img_data, mask) img_data = Variable(img_data) if ngpu: img_data = img_data.cuda() img_target = img_target.cuda() # train Unet(G) output = uModel(img_data) loss_img = lossF(output, img_target) loss_mask = lossF(output * mask_gpu, img_target * mask_gpu) Doutput = dModel(output) loss_ad = lossBCE(Doutput, real_label) lossG = 0.1 * loss_img + 0.8 * loss_mask + 0.1 * loss_ad print(('TEST: [%d / %d] ' 'LossG: %.4f; LossIMG: %.4f, LossMASK: %.4f, LossAD: %.4f;') % ( i, len(dataloader), lossG.data.mean(), loss_img.data.mean(), loss_mask.data.mean(), loss_ad.data.mean(), )) # record if 1: for x in lossG: lossG_list.append(x.data.mean()) for x in loss_img: loss_img_list.append(x.data.mean()) for x in loss_mask: loss_mask_list.append(x.data.mean()) for x in loss_ad: loss_ad_list.append(x.data.mean()) img_real = img_target.clone() img_fake = output.clone() if i < 10: nrow = int(np.sqrt(batchSize)) vutils.save_image(img_real.data, 'output/unet_test%d_real.png' % (i), nrow=nrow, normalize=True) vutils.save_image(img_fake.data, 'output/unet_test%d_fake.png' % (i), nrow=nrow, normalize=True) if 1: fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(20, 10)) ax0.plot(lossG_list, label="$lossG_total$") ax0.legend() ax1.plot(loss_img_list, label="loss_img") ax1.plot(loss_mask_list, label="loss_mask") ax1.plot(loss_ad_list, label="loss_ad") ax1.legend() plt.savefig("loss_TEST.png", dpi=200) plt.close(fig) endT = time.localtime() print('begin: %d:%d:%d' % (beginT.tm_hour, beginT.tm_min, beginT.tm_sec)) print('end: %d:%d:%d' % (endT.tm_hour, endT.tm_min, endT.tm_sec)) pass
def main(): parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", type=str, default="unet", help="") parser.add_argument("-dd", "--data-dir", type=str, default=None, required=True, help="Data dir") parser.add_argument( "-c", "--checkpoint", type=str, default=None, required=True, help="Checkpoint filename to use as initial model weights", ) parser.add_argument("-b", "--batch-size", type=int, default=16, help="Batch size for inference") parser.add_argument("-tta", "--tta", default=None, type=str, help="Type of TTA to use [fliplr, d4]") args = parser.parse_args() data_dir = args.data_dir checkpoint_file = auto_file(args.checkpoint) run_dir = os.path.dirname(os.path.dirname(checkpoint_file)) out_dir = os.path.join(run_dir, "submit") os.makedirs(out_dir, exist_ok=True) checkpoint = load_checkpoint(checkpoint_file) checkpoint_epoch = checkpoint["epoch"] print("Loaded model weights from", args.checkpoint) print("Epoch :", checkpoint_epoch) print( "Metrics (Train):", "IoU:", checkpoint["epoch_metrics"]["train"]["jaccard"], "Acc:", checkpoint["epoch_metrics"]["train"]["accuracy"], ) print( "Metrics (Valid):", "IoU:", checkpoint["epoch_metrics"]["valid"]["jaccard"], "Acc:", checkpoint["epoch_metrics"]["valid"]["accuracy"], ) model = get_model(args.model) unpack_checkpoint(checkpoint, model=model) threshold = checkpoint["epoch_metrics"]["valid"].get("optimized_jaccard/threshold", 0.5) print("Using threshold", threshold) model = nn.Sequential(PickModelOutput(model, OUTPUT_MASK_KEY), nn.Sigmoid()) if args.tta == "fliplr": model = TTAWrapper(model, fliplr_image2mask) if args.tta == "flipscale": model = TTAWrapper(model, fliplr_image2mask) model = MultiscaleTTAWrapper(model, size_offsets=[-128, -64, 64, 128]) if args.tta == "d4": model = TTAWrapper(model, d4_image2mask) model = model.cuda() if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model = model.eval() mask = predict(model, read_inria_image("sample_color.jpg"), image_size=(512, 512), batch_size=args.batch_size) mask = ((mask > threshold) * 255).astype(np.uint8) name = os.path.join(run_dir, "sample_color.jpg") cv2.imwrite(name, mask) test_predictions_dir = os.path.join(out_dir, "test_predictions") test_predictions_dir_compressed = os.path.join(out_dir, "test_predictions_compressed") if args.tta is not None: test_predictions_dir += f"_{args.tta}" test_predictions_dir_compressed += f"_{args.tta}" os.makedirs(test_predictions_dir, exist_ok=True) os.makedirs(test_predictions_dir_compressed, exist_ok=True) test_images = find_in_dir(os.path.join(data_dir, "test", "images")) for fname in tqdm(test_images, total=len(test_images)): image = read_inria_image(fname) mask = predict(model, image, image_size=(512, 512), batch_size=args.batch_size) mask = ((mask > threshold) * 255).astype(np.uint8) name = os.path.join(test_predictions_dir, os.path.basename(fname)) cv2.imwrite(name, mask) name_compressed = os.path.join(test_predictions_dir_compressed, os.path.basename(fname)) command = ( "gdal_translate --config GDAL_PAM_ENABLED NO -co COMPRESS=CCITTFAX4 -co NBITS=1 " + name + " " + name_compressed ) subprocess.call(command, shell=True)
def main(args): # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) log = util.get_logger(args.save_dir, args.name) log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # NEW : load the tag embeddings pos_vectors = util.torch_from_json(args.pos_emb_file) ner_vectors = util.torch_from_json(args.ner_emb_file) # Choose model log.info('Building model {}...'.format(args.name)) if 'baseline' in args.name: model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size) elif args.name == 'BiDAF_char': model = BiDAF_char(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size) # NEW elif (args.name == 'BiDAF_tag') or (args.name == 'BiDAF_tag_unfrozen') or ( args.name == 'BiDAF_tag_loss') or (args.name == 'BiDAF_tag_unfrozen_loss'): model = BiDAF_tag(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, hidden_size=args.hidden_size) elif (args.name == 'BiDAF_tag_ext') or (args.name == 'BiDAF_tag_ext_unfrozen'): model = BiDAF_tag_ext(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, hidden_size=args.hidden_size) elif args.name == 'coattn': model = CoattentionModel(hidden_dim=args.hidden_size, embedding_matrix=word_vectors, train_word_embeddings=False, dropout=0.35, pooling_size=16, number_of_iters=4, number_of_layers=2, device=device) else: raise NameError('No model named ' + args.name) model = nn.DataParallel(model, gpu_ids) log.info('Loading checkpoint from {}...'.format(args.load_path)) model = util.load_model(model, args.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Get data loader log.info('Building dataset...') record_file = vars(args)['{}_record_file'.format(args.split)] dataset = SQuAD(record_file, args.use_squad_v2) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Evaluate log.info('Evaluating on {} split...'.format(args.split)) nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission eval_file = vars(args)['{}_eval_file'.format(args.split)] with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, cpos_idxs, cner_idxs, cw_ems, cw_tfs, qw_idxs, qc_idxs, qpos_idxs, qner_idxs, qw_ems, qw_tfs, y1, y2, ids in data_loader: # NEW # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward if 'baseline' in args.name: log_p1, log_p2 = model(cw_idxs, qw_idxs) elif args.name == 'BiDAF_char': # Additional setup for forward cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs) elif args.name == 'coattn': max_c_len = cw_idxs.size(1) max_q_len = qw_idxs.size(1) c_len = [] q_len = [] for i in range(cw_idxs.size(0)): if len((cw_idxs[i] == 0).nonzero()) != 0: c_len_i = (cw_idxs[i] == 0).nonzero()[0].item() else: c_len_i = cw_idxs.size(1) if len((qw_idxs[i] == 0).nonzero()) != 0: q_len_i = (qw_idxs[i] == 0).nonzero()[0].item() else: q_len_i = qw_idxs.size(1) c_len.append(int(c_len_i)) q_len.append(int(q_len_i)) c_len = torch.Tensor(c_len).int() q_len = torch.Tensor(q_len).int() num_examples = int(cw_idxs.size(0) / len(gpu_ids)) log_p1, log_p2 = model(max_c_len, max_q_len, cw_idxs, qw_idxs, c_len, q_len, num_examples, True, False) # NEW elif (args.name == 'BiDAF_tag') or (args.name == 'BiDAF_tag_unfrozen') or ( args.name == 'BiDAF_tag_loss') or (args.name == 'BiDAF_tag_unfrozen_loss'): # Additional setup for forward cc_idxs = cc_idxs.to(device) cpos_idxs = cpos_idxs.to(device) cner_idxs = cner_idxs.to(device) qc_idxs = qc_idxs.to(device) qpos_idxs = qpos_idxs.to(device) qner_idxs = qner_idxs.to(device) log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs, cpos_idxs, qpos_idxs, cner_idxs, qner_idxs) elif (args.name == 'BiDAF_tag_ext') or (args.name == 'BiDAF_tag_ext_unfrozen'): # Additional setup for forward cc_idxs = cc_idxs.to(device) cpos_idxs = cpos_idxs.to(device) cner_idxs = cner_idxs.to(device) cw_ems = cw_ems.to(device) cw_tfs = cw_tfs.to(device) qc_idxs = qc_idxs.to(device) qpos_idxs = qpos_idxs.to(device) qner_idxs = qner_idxs.to(device) qw_ems = qw_ems.to(device) qw_tfs = qw_tfs.to(device) log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs, cpos_idxs, qpos_idxs, cner_idxs, qner_idxs, cw_ems, qw_ems, cw_tfs, qw_tfs) else: raise NameError('No model named ' + args.name) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) if args.split != 'test': # No labels for the test set, so NLL would be invalid progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) # Log results (except for test set, since it does not come with labels) if args.split != 'test': results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] if args.use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('{} {}'.format(args.split.title(), results_str)) # More detailed error analysis results = util.analysis_dicts(gold_dict, pred_dict, args.use_squad_v2) results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('{} {}'.format(args.split.title(), results_str)) # Log to TensorBoard tbx = SummaryWriter(args.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=eval_file, step=0, split=args.split, num_visuals=args.num_visuals) # Write submission file sub_path = join(args.save_dir, args.split + '_' + args.sub_file) log.info('Writing submission file to {}...'.format(sub_path)) with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(sub_dict): csv_writer.writerow([uuid, sub_dict[uuid]])
output_dir, 'fpn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) _print("loading checkpoint %s" % (load_name), logging) checkpoint = torch.load(load_name) args.session = checkpoint['session'] args.start_epoch = checkpoint['epoch'] FPN.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr = optimizer.param_groups[0]['lr'] if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] _print("loaded checkpoint %s" % (load_name), logging) if args.mGPUs: FPN = nn.DataParallel(FPN) if args.cuda: FPN.cuda() iters_per_epoch = int(train_size / args.batch_size) for epoch in range(args.start_epoch, args.max_epochs): # setting to train mode FPN.train() loss_temp = 0 start = time.time() if epoch % (args.lr_decay_step + 1) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) lr *= args.lr_decay_gamma
def load_checkpoint(path): """Load a PyTorch model checkpoint Params -------- path (str): saved model checkpoint. Must start with `model_name-` and end in '.pth' Returns -------- None, save the `model` to `path` """ # Get the model name model_name = path.split('-')[0] assert (model_name in ['vgg16', 'resnet50']), "Path must have the correct model name" # Load in checkpoint checkpoint = torch.load(path) if model_name == 'vgg16': model = models.vgg16(pretrained=True) # Make sure to set parameters as not trainable for param in model.parameters(): param.requires_grad = False model.classifier = checkpoint['classifier'] elif model_name == 'resnet50': model = models.resnet50(pretrained=True) # Make sure to set parameters as not trainable for param in model.parameters(): param.requires_grad = False model.fc = checkpoint['fc'] # Load in the state dict model.load_state_dict(checkpoint['state_dict']) total_params = sum(p.numel() for p in model.parameters()) print(f'{total_params:,} total parameters.') total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print(f'{total_trainable_params:,} total gradient parameters.') # Move to gpu if multi_gpu: model = nn.DataParallel(model) if train_on_gpu: model = model.to('cuda') # Model basics model.class_to_idx = checkpoint['class_to_idx'] model.idx_to_class = checkpoint['idx_to_class'] model.epochs = checkpoint['epochs'] # Optimizer optimizer = checkpoint['optimizer'] optimizer.load_state_dict(checkpoint['optimizer_state_dict']) return model, optimizer
def train_cnndm(): print("Start training hierarchical RNN model") # ---------------------------------------------------------------------------------- # args = {} args['use_gpu'] = True args['num_utterances'] = 50 # max no. utterance in a meeting args['num_words'] = 32 # max no. words in an utterance args['summary_length'] = 144 # max no. words in a summary args['summary_type'] = 'long' # long or short summary args['vocab_size'] = 30522 # BERT tokenizer args['embedding_dim'] = 256 # word embeeding dimension args['rnn_hidden_size'] = 512 # RNN hidden size args['dropout'] = 0.1 args['num_layers_enc'] = 2 # in total it's num_layers_enc*2 (word/utt) args['num_layers_dec'] = 1 args['batch_size'] = 32 args['update_nbatches'] = 1 args['num_epochs'] = 20 args['random_seed'] = 78 args['best_val_loss'] = 1e+10 args['val_batch_size'] = 32 # 1 for now --- evaluate ROUGE args['val_stop_training'] = 10 args['adjust_lr'] = True # if True overwrite the learning rate above args['initial_lr'] = 5e-3 # lr = lr_0*step^(-decay_rate) args['decay_rate'] = 0.25 args['label_smoothing'] = 0.1 args['a_div'] = 1.0 args['memory_utt'] = False args['model_save_dir'] = "lib/trained_models/" # args['load_model'] = "lib/trained_models/MODEL_CNNDM0.pt" args['load_model'] = None args['model_name'] = 'MODEL_CNNDM1' # ---------------------------------------------------------------------------------- # print_config(args) if args['use_gpu']: if 'X_SGE_CUDA_DEVICE' in os.environ: # to run on CUED stack machine print('running on the stack... 1 GPU') cuda_device = os.environ['X_SGE_CUDA_DEVICE'] print('X_SGE_CUDA_DEVICE is set to {}'.format(cuda_device)) os.environ['CUDA_VISIBLE_DEVICES'] = cuda_device else: print('running locally...') os.environ[ "CUDA_VISIBLE_DEVICES"] = '1' # choose the device (GPU) here device = 'cuda' else: device = 'cpu' print("device = {}".format(device)) # random seed random.seed(args['random_seed']) torch.manual_seed(args['random_seed']) np.random.seed(args['random_seed']) args[ 'model_data_dir'] = "/home/alta/summary/pm574/summariser0/lib/model_data/" args['max_pos_embed'] = 512 args['max_num_sentences'] = 32 args['max_summary_length'] = args['summary_length'] train_data = load_cnndm_data(args, 'trainx', dump=False) # train_data = load_cnndm_data(args, 'test', dump=False) # print("loaded TEST data") valid_data = load_cnndm_data(args, 'valid', dump=False) model = EncoderDecoder(args, device=device) print(model) # Load model if specified (path to pytorch .pt) if args['load_model'] != None: model_path = args['load_model'] try: model.load_state_dict(torch.load(model_path)) except RuntimeError: # need to remove module # Main model model_state_dict = torch.load(model_path) new_model_state_dict = OrderedDict() for key in model_state_dict.keys(): new_model_state_dict[key.replace("module.", "")] = model_state_dict[key] model.load_state_dict(new_model_state_dict) model.train() print("Loaded model from {}".format(args['load_model'])) else: print("Train a new model") # to use multiple GPUs if torch.cuda.device_count() > 1: print("Multiple GPUs: {}".format(torch.cuda.device_count())) model = nn.DataParallel(model) print("Train a new model") # Hyperparameters BATCH_SIZE = args['batch_size'] NUM_EPOCHS = args['num_epochs'] VAL_BATCH_SIZE = args['val_batch_size'] VAL_STOP_TRAINING = args['val_stop_training'] if args['label_smoothing'] > 0.0: criterion = LabelSmoothingLoss(num_classes=args['vocab_size'], smoothing=args['label_smoothing'], reduction='none') else: criterion = nn.NLLLoss(reduction='none') # we use two separate optimisers (encoder & decoder) optimizer = optim.Adam(model.parameters(), lr=0.77, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) optimizer.zero_grad() # validation losses best_val_loss = args['best_val_loss'] best_epoch = 0 stop_counter = 0 training_step = 0 for epoch in range(NUM_EPOCHS): print( "======================= Training epoch {} =======================" .format(epoch)) num_train_data = len(train_data) # num_batches = int(num_train_data/BATCH_SIZE) + 1 num_batches = int(num_train_data / BATCH_SIZE) print("num_batches = {}".format(num_batches)) print("shuffle train data") random.shuffle(train_data) idx = 0 for bn in range(num_batches): input, u_len, w_len, target, tgt_len = get_a_batch( train_data, idx, BATCH_SIZE, args['num_utterances'], args['num_words'], args['summary_length'], args['summary_type'], device) # decoder target decoder_target, decoder_mask = shift_decoder_target( target, tgt_len, device, mask_offset=True) decoder_target = decoder_target.view(-1) decoder_mask = decoder_mask.view(-1) try: # decoder_output = model(input, u_len, w_len, target) decoder_output, _, attn_scores, _, u_attn_scores = model( input, u_len, w_len, target) except IndexError: print( "there is an IndexError --- likely from if segment_indices[bn][-1] == u_len[bn]-1:" ) print("for now just skip this batch!") idx += BATCH_SIZE # previously I forget to add this line!!! continue loss = criterion(decoder_output.view(-1, args['vocab_size']), decoder_target) loss = (loss * decoder_mask).sum() / decoder_mask.sum() # loss.backward() # Diversity Loss: if BATCH_SIZE == 1: intra_div, inter_div = diverisity_loss(u_attn_scores, decoder_target, u_len, tgt_len) if inter_div == 0: loss_div = 0 else: loss_div = intra_div / inter_div else: dec_target_i = 0 loss_div = 0 for bi in range(BATCH_SIZE): one_u_attn_scores = u_attn_scores[bi:bi + 1, :, :] one_decoder_target = decoder_target[ dec_target_i:dec_target_i + args['summary_length']] one_u_len = u_len[bi:bi + 1] one_tgt_len = tgt_len[bi:bi + 1] intra_div, inter_div = diverisity_loss( one_u_attn_scores, one_decoder_target, one_u_len, one_tgt_len) if inter_div == 0: loss_div += 0 else: loss_div += intra_div / inter_div dec_target_i += args['summary_length'] loss_div /= BATCH_SIZE total_loss = loss + args['a_div'] * loss_div total_loss.backward() idx += BATCH_SIZE if bn % args['update_nbatches'] == 0: # gradient_clipping max_norm = 0.5 nn.utils.clip_grad_norm_(model.parameters(), max_norm) # update the gradients if args['adjust_lr']: adjust_lr(optimizer, args['initial_lr'], args['decay_rate'], training_step) optimizer.step() optimizer.zero_grad() training_step += args['batch_size'] * args['update_nbatches'] if bn % 2 == 0: print("[{}] batch {}/{}: loss = {:.5f} | loss_div = {:.5f}". format(str(datetime.now()), bn, num_batches, loss, loss_div)) sys.stdout.flush() if bn % 100 == 0: print( "======================== GENERATED SUMMARY ========================" ) print( bert_tokenizer.decode( torch.argmax(decoder_output[0], dim=-1).cpu().numpy()[:tgt_len[0]])) print( "======================== REFERENCE SUMMARY ========================" ) print( bert_tokenizer.decode( decoder_target.view(BATCH_SIZE, args['summary_length']) [0, :tgt_len[0]].cpu().numpy())) if bn % 1000 == 0 and epoch > 0: # ---------------- Evaluate the model on validation data ---------------- # print("Evaluating the model at epoch {} step {}".format( epoch, bn)) print("learning_rate = {}".format( optimizer.param_groups[0]['lr'])) # switch to evaluation mode model.eval() with torch.no_grad(): avg_val_loss = evaluate(model, valid_data, VAL_BATCH_SIZE, args, device) print("avg_val_loss_per_token = {}".format(avg_val_loss)) # switch to training mode model.train() # ------------------- Save the model OR Stop training ------------------- # if avg_val_loss < best_val_loss: stop_counter = 0 best_val_loss = avg_val_loss best_epoch = epoch state = { 'epoch': epoch, 'bn': bn, 'training_step': training_step, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_val_loss': best_val_loss } savepath = args[ 'model_save_dir'] + "model-{}-ep{}.pt".format( args['model_name'], epoch) # torch.save(model.state_dict(), savepath) torch.save(state, savepath) print("Model improved & saved at {}".format(savepath)) else: print("Model not improved #{}".format(stop_counter)) if stop_counter < VAL_STOP_TRAINING: print( "Just continue training ---- no loading old weights" ) stop_counter += 1 else: print( "Model has not improved for {} times! Stop training." .format(VAL_STOP_TRAINING)) return print("End of training hierarchical RNN model")
def main(): """Do stuff.""" args = parser.parse_args() # don't use this, neither set learning rate as a linear function # of the count of gpus, it will make accuracy lower # args.batch_size = args.batch_size * torch.cuda.device_count() if args.mode == 'prune': args.save_folder = os.path.join(args.save_folder, str(args.target_sparsity)) if args.initial_sparsity != 0.0: args.load_folder = os.path.join(args.load_folder, str(args.initial_sparsity)) if args.save_folder and not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) if args.log_path: set_logger(args.log_path) if args.pruning_ratio_to_acc_record_file and not os.path.isdir( args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]): os.makedirs(args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]) if not torch.cuda.is_available(): logging.info('no gpu device available') args.cuda = False torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) cudnn.benchmark = True # If set > 0, will resume training from a given checkpoint. resume_from_epoch = 0 resume_folder = args.load_folder for try_epoch in range(200, 0, -1): if os.path.exists( args.checkpoint_format.format(save_folder=resume_folder, epoch=try_epoch)): resume_from_epoch = try_epoch break if args.restore_epoch: resume_from_epoch = args.restore_epoch # Set default train and test path if not provided as input. utils.set_dataset_paths(args) if resume_from_epoch: filepath = args.checkpoint_format.format(save_folder=resume_folder, epoch=resume_from_epoch) checkpoint = torch.load(filepath) checkpoint_keys = checkpoint.keys() dataset_history = checkpoint['dataset_history'] dataset2num_classes = checkpoint['dataset2num_classes'] masks = checkpoint['masks'] shared_layer_info = checkpoint['shared_layer_info'] piggymask_floats = checkpoint['piggymask_floats'] piggymask_task_tags = checkpoint['piggymask_task_tags'] if 'num_for_construct' in checkpoint_keys: num_for_construct = checkpoint['num_for_construct'] if args.mode == 'inference' and 'network_width_multiplier' in shared_layer_info[ args.dataset]: # TODO, temporary solution args.network_width_multiplier = shared_layer_info[ args.dataset]['network_width_multiplier'] else: dataset_history = [] dataset2num_classes = {} masks = {} shared_layer_info = {} piggymask_floats = {} piggymask_task_tags = {} if args.baseline_acc_file is None or not os.path.isfile( args.baseline_acc_file): sys.exit(3) with open(args.baseline_acc_file, 'r') as jsonfile: json_data = json.load(jsonfile) baseline_acc = float(json_data[args.dataset]) if args.mode == 'prune' and not args.pruning_ratio_to_acc_record_file: sys.exit(-1) if args.arch == 'resnet50': num_for_construct = [ 64, 64, 64 * 4, 128, 128 * 4, 256, 256 * 4, 512, 512 * 4 ] model = models.__dict__[args.arch](pretrained=True, num_for_construct=num_for_construct, threshold=args.threshold) elif 'vgg' in args.arch: custom_cfg = [ 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M' ] model = models.__dict__[args.arch]( custom_cfg, dataset_history=dataset_history, dataset2num_classes=dataset2num_classes, network_width_multiplier=args.network_width_multiplier, shared_layer_info=shared_layer_info, groups=int(args.network_width_multiplier)) else: print('Error!') sys.exit(1) # Add and set the model dataset. model.add_dataset(args.dataset, args.num_classes) model.set_dataset(args.dataset) model = nn.DataParallel(model) model = model.cuda() NEED_ADJUST_MASK = False task_id = model.module.datasets.index(args.dataset) + 1 if not masks: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): mask = torch.ByteTensor(module.weight.data.size()).fill_(0) if 'cuda' in module.weight.data.type(): mask = mask.cuda() masks[name] = mask module.packnet_mask = mask else: # when we expand network, we need to allocate new masks for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d): if masks[name].size(0) < module.weight.data.size(0): assert args.mode == 'finetune' NEED_ADJUST_MASK = True elif masks[name].size(0) > module.weight.data.size(0): assert args.mode == 'inference' NEED_ADJUST_MASK = True if NEED_ADJUST_MASK: if args.mode == 'finetune': for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(task_id) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:masks[name].size(0), :, :, :].copy_(masks[name]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(task_id) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:masks[name].size(0), :masks[name].size(1)].copy_( masks[name]) masks[name] = mask elif args.mode == 'inference': for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d): mask = torch.ByteTensor( module.weight.data.size()).fill_(task_id) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:, :, :, :].copy_( masks[name][:mask.size(0), :, :, :]) masks[name] = mask elif isinstance(module, nl.SharableLinear): mask = torch.ByteTensor( module.weight.data.size()).fill_(task_id) if 'cuda' in module.weight.data.type(): mask = mask.cuda() mask[:, :].copy_( masks[name][:mask.size(0), :mask.size(1)]) masks[name] = mask for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): module.packnet_mask = masks[name] if args.dataset not in shared_layer_info: shared_layer_info[args.dataset] = { 'bias': {}, 'bn_layer_running_mean': {}, 'bn_layer_running_var': {}, 'bn_layer_weight': {}, 'bn_layer_bias': {} } NEED_ADJUST_MASK = False if task_id == 1: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): module.inference_task_id = task_id elif task_id == 2 and not piggymask_floats: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): piggymask_floats[name] = torch.zeros_like(masks[name], dtype=torch.float32) piggymask_task_tags[name] = torch.zeros_like(masks[name]) piggymask_floats[name] = torch.where( masks[name] != 0, torch.full_like(piggymask_floats[name], 0.01), piggymask_floats[name]) piggymask_task_tags[name] = torch.where( masks[name] != 0, torch.full_like(piggymask_task_tags[name], task_id), piggymask_task_tags[name]) piggymask_floats[name] = Parameter(piggymask_floats[name]) module.piggymask_float = piggymask_floats[name] module.piggymask_task_tag = piggymask_task_tags[name] module.inference_task_id = task_id elif task_id >= 2: # when we expand network, we need to allocate new piggymasks for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d): if piggymask_floats[name].size(0) < module.weight.data.size(0): assert args.mode == 'finetune' NEED_ADJUST_MASK = True elif piggymask_floats[name].size(0) > module.weight.data.size( 0): assert args.mode == 'inference' NEED_ADJUST_MASK = True if NEED_ADJUST_MASK: if args.mode == 'finetune': for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d): piggymask_float = torch.zeros_like(masks[name], dtype=torch.float32) piggymask_task_tag = torch.zeros_like(masks[name]) piggymask_float[:piggymask_floats[name]. size(0), :, :, :].copy_( piggymask_floats[name]) piggymask_task_tag[:piggymask_task_tags[name]. size(0), :, :, :].copy_( piggymask_task_tags[name]) piggymask_floats[name] = Parameter(piggymask_float) piggymask_task_tags[name] = piggymask_task_tag elif isinstance(module, nl.SharableLinear): piggymask_float = torch.zeros_like(masks[name], dtype=torch.float32) piggymask_task_tag = torch.zeros_like(masks[name]) piggymask_float[:piggymask_floats[name].size(0), : piggymask_floats[name].size(1)].copy_( piggymask_floats[name]) piggymask_task_tag[:piggymask_task_tags[name].size( 0), :piggymask_task_tags[name].size(1)].copy_( piggymask_task_tags[name]) piggymask_floats[name] = Parameter(piggymask_float) piggymask_task_tags[name] = piggymask_task_tag elif args.mode == 'inference': for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d): piggymask_float = torch.zeros_like(masks[name], dtype=torch.float32) piggymask_task_tag = torch.zeros_like(masks[name]) piggymask_float[:, :, :, :].copy_( piggymask_floats[name] [:piggymask_float.size(0), :, :, :]) piggymask_floats[name] = Parameter(piggymask_float) piggymask_task_tag[:, :, :, :].copy_( piggymask_task_tags[name] [:piggymask_task_tag.size(0), :, :, :]) piggymask_task_tags[name] = piggymask_task_tag elif isinstance(module, nl.SharableLinear): piggymask_float = torch.zeros_like(masks[name], dtype=torch.float32) piggymask_task_tag = torch.zeros_like(masks[name]) piggymask_float[:, :].copy_( piggymask_floats[name][:piggymask_float.size(0), : piggymask_float.size(1)]) piggymask_floats[name] = Parameter(piggymask_float) piggymask_task_tag[:, :].copy_( piggymask_task_tags[name][:piggymask_task_tag.size( 0), :piggymask_task_tag.size(1)]) piggymask_task_tags[name] = piggymask_task_tag for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): if args.mode == 'finetune' and not args.finetune_again: piggymask_task_tags[name].data[ piggymask_task_tags[name].data.eq(0) & (masks[name] != 0)] = task_id piggymask_floats[name].data[ piggymask_task_tags[name].data.eq(task_id)] = 0.01 module.piggymask_float = piggymask_floats[name] module.piggymask_task_tag = piggymask_task_tags[name] module.inference_task_id = task_id shared_layer_info[args.dataset][ 'network_width_multiplier'] = args.network_width_multiplier if args.num_classes == 2: train_loader = dataset.cifar100_train_loader_two_class( args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader_two_class( args.dataset, args.val_batch_size) elif args.num_classes == 5: train_loader = dataset.cifar100_train_loader(args.dataset, args.batch_size) val_loader = dataset.cifar100_val_loader(args.dataset, args.val_batch_size) else: print("num_classes should be either 2 or 5") sys.exit(1) # if we are going to save checkpoint in other folder, then we recalculate the starting epoch if args.save_folder != args.load_folder: start_epoch = 0 else: start_epoch = resume_from_epoch curr_prune_step = begin_prune_step = start_epoch * len(train_loader) end_prune_step = curr_prune_step + args.pruning_interval * len( train_loader) manager = Manager(args, model, shared_layer_info, masks, train_loader, val_loader, begin_prune_step, end_prune_step) if args.mode == 'inference': manager.load_checkpoint_only_for_evaluate(resume_from_epoch, resume_folder) manager.validate(resume_from_epoch - 1) return # manager.inference_dataset_idx lr = args.lr lr_mask = args.lr_mask # update all layers named_params = dict(model.named_parameters()) params_to_optimize_via_SGD = [] named_of_params_to_optimize_via_SGD = [] masks_to_optimize_via_Adam = [] named_of_masks_to_optimize_via_Adam = [] for name, param in named_params.items(): if 'classifiers' in name: if '.{}.'.format(model.module.datasets.index( args.dataset)) in name: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) continue elif 'piggymask' in name: masks_to_optimize_via_Adam.append(param) named_of_masks_to_optimize_via_Adam.append(name) else: params_to_optimize_via_SGD.append(param) named_of_params_to_optimize_via_SGD.append(name) optimizer_network = optim.SGD(params_to_optimize_via_SGD, lr=lr, weight_decay=0.0, momentum=0.9, nesterov=True) optimizers = Optimizers() optimizers.add(optimizer_network, lr) if masks_to_optimize_via_Adam: optimizer_mask = optim.Adam(masks_to_optimize_via_Adam, lr=lr_mask) optimizers.add(optimizer_mask, lr_mask) manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder, NEED_ADJUST_MASK) # k = int(args.network_width_multiplier) # assert k >= 2 # for name, module in model.module.named_modules(): # if isinstance(module, nl.SharableConv2d): # n = len(module.weight) # n = int((n // k * (k-1)) * 0.1) # # module.weight.data[:n, :, :, :] = 0.0 # module.packnet_mask[:n, :, :, :] = 255 # if isinstance(module, nl.SharableLinear): # n = len(module.bias) # n = int((n // k * (k-1)) * 0.1) # # module.weight.data[:n, :] = 0.0 # # module.bias.data[:n] = 0.0 # module.packnet_mask[:n, :] = 255 # if isinstance(module, nn.BatchNorm2d): # n = len(module.weight) # n = int((n // k * (k-1)) * 0.1) # # module.weight.data[:n] = 0.0 """Performs training.""" curr_lrs = [] for optimizer in optimizers: for param_group in optimizer.param_groups: curr_lrs.append(param_group['lr']) break if args.mode == 'prune': if 'gradual_prune' in args.load_folder and args.save_folder == args.load_folder: args.epochs = 20 + resume_from_epoch logging.info('') logging.info('Before pruning: ') logging.info('Sparsity range: {} -> {}'.format(args.initial_sparsity, args.target_sparsity)) must_pruning_ratio_for_curr_task = 0.0 json_data = {} if os.path.isfile(args.pruning_ratio_to_acc_record_file): with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file: json_data = json.load(json_file) if args.network_width_multiplier == args.max_allowed_network_width_multiplier and json_data[ '0.0'] < baseline_acc: # if we reach the upperbound and still do not get the accuracy over our target on curr task, we still do pruning logging.info( 'we reach the upperbound and still do not get the accuracy over our target on curr task' ) remain_num_tasks = args.total_num_tasks - len(dataset_history) logging.info('remain_num_tasks: {}'.format(remain_num_tasks)) ratio_allow_for_curr_task = round(1.0 / (remain_num_tasks + 1), 1) logging.info('ratio_allow_for_curr_task: {:.4f}'.format( ratio_allow_for_curr_task)) must_pruning_ratio_for_curr_task = 1.0 - ratio_allow_for_curr_task if args.initial_sparsity >= must_pruning_ratio_for_curr_task: sys.exit(6) manager.validate(start_epoch - 1) logging.info('') elif args.mode == 'finetune': if not args.finetune_again: manager.pruner.make_finetuning_mask() logging.info('Finetune stage...') else: logging.info('Piggymask Retrain...') history_best_avg_val_acc_when_retraining = manager.validate( start_epoch - 1) num_epochs_that_criterion_does_not_get_better = 0 stop_lr_mask = True if manager.pruner.calculate_curr_task_ratio() == 0.0: logging.info( 'There is no left space in convolutional layer for curr task' ', we will try to use prior experience as long as possible') stop_lr_mask = False for epoch_idx in range(start_epoch, args.epochs): avg_train_acc, curr_prune_step = manager.train(optimizers, epoch_idx, curr_lrs, curr_prune_step) avg_val_acc = manager.validate(epoch_idx) # if args.mode == 'prune' and (epoch_idx+1) >= (args.pruning_interval + start_epoch) and ( # avg_val_acc > history_best_avg_val_acc_when_prune): # pass if args.finetune_again: if avg_val_acc > history_best_avg_val_acc_when_retraining: history_best_avg_val_acc_when_retraining = avg_val_acc num_epochs_that_criterion_does_not_get_better = 0 if args.save_folder is not None: for path in os.listdir(args.save_folder): if '.pth.tar' in path: os.remove(os.path.join(args.save_folder, path)) else: print('Something is wrong! Block the program with pdb') pdb.set_trace() history_best_avg_val_acc = avg_val_acc manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) else: num_epochs_that_criterion_does_not_get_better += 1 if args.finetune_again and num_epochs_that_criterion_does_not_get_better == 5: logging.info("stop retraining") sys.exit(0) if args.mode == 'finetune': if epoch_idx + 1 == 50 or epoch_idx + 1 == 80: for param_group in optimizers[0].param_groups: param_group['lr'] *= 0.1 curr_lrs[0] = param_group['lr'] if len(optimizers.lrs) == 2: if epoch_idx + 1 == 50: for param_group in optimizers[1].param_groups: param_group['lr'] *= 0.2 if stop_lr_mask and epoch_idx + 1 == 70: for param_group in optimizers[1].param_groups: param_group['lr'] *= 0.0 curr_lrs[1] = param_group['lr'] if args.save_folder is not None: pass # paths = os.listdir(args.save_folder) # if paths and '.pth.tar' in paths[0]: # for checkpoint_file in paths: # os.remove(os.path.join(args.save_folder, checkpoint_file)) else: print('Something is wrong! Block the program with pdb') if task_id >= 2: for name, module in model.module.named_modules(): if isinstance(module, nl.SharableConv2d) or isinstance( module, nl.SharableLinear): if args.mode == 'finetune': module.piggymask_task_tag[module.piggymask_float.le( 0.005)] = 0 if avg_train_acc > 0.95: manager.save_checkpoint(optimizers, epoch_idx, args.save_folder) logging.info('-' * 16) if args.pruning_ratio_to_acc_record_file: json_data = {} if os.path.isfile(args.pruning_ratio_to_acc_record_file): with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file: json_data = json.load(json_file) if args.mode == 'finetune' and not args.test_piggymask: json_data[0.0] = round(avg_val_acc, 4) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file) if avg_train_acc > 0.95 and avg_val_acc >= baseline_acc: pass else: logging.info("It's time to expand the Network") logging.info('Auto expand network') sys.exit(2) if manager.pruner.calculate_curr_task_ratio() == 0.0: logging.info( 'There is no left space in convolutional layer for curr task, so needless to prune' ) sys.exit(5) elif args.mode == 'prune': if avg_train_acc > 0.95: json_data[args.target_sparsity] = round(avg_val_acc, 4) with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file: json.dump(json_data, json_file) else: sys.exit(6) must_pruning_ratio_for_curr_task = 0.0 if args.network_width_multiplier == args.max_allowed_network_width_multiplier and json_data[ '0.0'] < baseline_acc: # if we reach the upperbound and still do not get the accuracy over our target on curr task, we still do pruning logging.info( 'we reach the upperbound and still do not get the accuracy over our target on curr task' ) remain_num_tasks = args.total_num_tasks - len(dataset_history) logging.info('remain_num_tasks: {}'.format(remain_num_tasks)) ratio_allow_for_curr_task = round(1.0 / (remain_num_tasks + 1), 1) logging.info('ratio_allow_for_curr_task: {:.4f}'.format( ratio_allow_for_curr_task)) must_pruning_ratio_for_curr_task = 1.0 - ratio_allow_for_curr_task if args.target_sparsity >= must_pruning_ratio_for_curr_task: sys.exit(6)
def objective(params, GENERATOR_ID): # define the model print params depth, width = params learning_rate = 1.0e-3 decay_rate = 0.0e-6 class ResBlock(nn.Module): def __init__(self, NumChannels): super(ResBlock, self).__init__() self.conv0 = nn.Conv3d(NumChannels, NumChannels, 3, stride=1, padding=1) #self.bn0 = nn.BatchNorm3d(NumChannels) self.conv1 = nn.Conv3d(NumChannels, NumChannels, 3, stride=1, padding=1) #self.bn1 = nn.BatchNorm3d(NumChannels) self.selu0 = nn.SELU() self.selu1 = nn.SELU() def forward(self, x): #y = self.bn0(x) y = self.conv0(x) #y = self.bn1(y) y = self.selu0(y) y = self.conv1(y) return self.selu1(torch.add(y, x)) class ResNet(nn.Module): def __init__(self): super(ResNet, self).__init__() self.conv0 = nn.Conv3d(1, 32, 3, stride=1, padding=1) #self.norm0 = nn.BatchNorm3d(64) #self.conv00 = nn.Conv3d(64, 64, 3, stride=1, padding=1) self.conv1 = nn.Conv3d(32, 64, 3, stride=2) #self.norm1 = nn.BatchNorm3d(96) #self.conv11 = nn.Conv3d(96, 96, 3, stride=1, padding=1) self.conv2 = nn.Conv3d(64, 96, 3, stride=2, padding=1) #self.norm2 = nn.BatchNorm3d(128) #self.conv22 = nn.Conv3d(128, 128, 3, stride=1, padding=1) self.conv3 = nn.Conv3d(96, 128, 3, stride=2, padding=1) #self.norm3 = nn.BatchNorm3d(192) #self.conv33 = nn.Conv3d(192, 192, 3, stride=1, padding=1) self.conv4 = nn.Conv3d(128, 192, 3, stride=1) #self.norm4 = nn.BatchNorm3d(width) #self.conv5 = nn.Conv3d(width, width, 3, stride=1) self.fc1 = nn.Linear(width, width) self.fc2 = nn.Linear(width, 2) #self.norm = nn.BatchNorm1d(width) self.selu0 = nn.SELU() self.selu1 = nn.SELU() self.selu2 = nn.SELU() self.selu3 = nn.SELU() self.selu4 = nn.SELU() self.selu5 = nn.SELU() self.selu6 = nn.SELU() self.block0 = self.build_layer(4, 32) self.block1 = self.build_layer(4, 64) self.block2 = self.build_layer(4, 96) def build_layer(self, NumLayers, NumChannels): layers = [] for _ in range(NumLayers): layers.append(ResBlock(NumChannels)) return nn.Sequential(*layers) def forward(self, x): x = x.view(-1, 1, 25, 25, 25) x = self.selu0(x) x = self.conv0(x) x = self.selu1(x) x = self.block0(x) x = self.conv1(x) x = self.selu2(x) x = self.block1(x) x = self.conv2(x) x = self.selu3(x) #x = self.rselu(self.norm2(x)) x = self.block2(x) x = self.conv3(x) x = self.selu4(x) #x = self.rselu(self.norm3(x)) x = self.conv4(x) x = self.selu5(x) x = x.view(-1, width) #x = self.norm(x) x = self.fc1(x) #x = self.norm(x) x = self.selu6(x) x = self.fc2(x) #x = self.softmax(x) return x from torch import load net = nn.DataParallel(ResNet(), device_ids=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) #net = ResNet() # load previous model #net.load_state_dict(load("test/savedmodel_depth_15-width_192")) net.cuda() import torch.optim as optim from torch.optim.lr_scheduler import ReduceLROnPlateau net.train() criterion = nn.CrossEntropyLoss().cuda() #optimizer = optim.SGD(net.parameters(), lr=learning_rate, weight_decay=decay_rate, momentum=0.9) optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=decay_rate) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, min_lr=1.0e-3, patience=10, factor=0.1, threshold=1.0e-4) loss_history = [] epoch_end_relative_error_history = [] epoch_num = 50 #import pdb;pdb.set_trace() net.train() # main process for training prev_val_loss = 0.0 stag_break_count = 0 early_stop_count = 0 prev_epoch_end_val_loss = 0.0 epoch_end_val_loss = 0.0 #train_generator.start() train_loader = train_generator.generators[GENERATOR_ID].generate() #val_generator.start() val_loader = val_generator.generators[GENERATOR_ID].generate() running_loss = 0.0 val_loss = 0.0 i = 0 import itertools for train_data, val_data in itertools.izip(train_loader, val_loader): #inputs, labels = data #ECAL, HCAL, labels = Variable(inputs[0].cuda()), Variable(inputs[1].cuda()), Variable(labels.cuda()) net.train() ECAL, _, labels = train_data ECAL = np.swapaxes(ECAL, 1, 3) ECAL, labels = Variable(from_numpy(ECAL).cuda()), Variable( from_numpy(labels).long().cuda()) optimizer.zero_grad() ECAL = 50000.0 * ECAL outputs = net(ECAL) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.data[0] net.eval() ECAL, _, labels = val_data ECAL = np.swapaxes(ECAL, 1, 3) ECAL, labels = Variable(from_numpy(ECAL).cuda()), Variable( from_numpy(labels).long().cuda()) ECAL = 50000.0 * ECAL val_outputs = net(ECAL) validation_loss = criterion(val_outputs, labels) val_loss += validation_loss.data[0] if i % 20 == 19: running_loss /= 20 val_loss /= 20 print('[%d, %5d, %5d] loss: %.10f' % (GENERATOR_ID, i / 4000 + 1, i % 4000 + 1, running_loss)), print(' val loss: %.10f' % (val_loss)), relative_error = (val_loss - prev_val_loss) / float(val_loss) print(' relative error: %.10f' % (relative_error)), if (val_loss < 0.10): break scheduler.step(val_loss) if (relative_error > 0.01 and i != 0): early_stop_count += 1 if (early_stop_count > 5): break else: early_stop_count = 0 print(' early stop count: %d' % (early_stop_count)) loss_history.append([ GENERATOR_ID, i / 4000 + 1, i % 4000 + 1, running_loss, val_loss, relative_error, early_stop_count ]) # if(i % 400==399): # epoch_end_val_loss = val_loss # epoch_end_relative_error = (epoch_end_val_loss-prev_epoch_end_val_loss)/float(epoch_end_val_loss) # print('[%d] epoch_end_relative_error: %.10f' % # (GENERATOR_ID, epoch_end_relative_error)), # epoch_end_relative_error_history.append([GENERATOR_ID, i/4000 + 1, i%4000 + 1, epoch_end_relative_error]) # if(epoch_end_relative_error > -0.005 and i/4000!=0): # stag_break_count+=1 # if(stag_break_count>0): # break # else: # stag_break_count=0 # print(' stag_break_count: %d' % # (stag_break_count)) # prev_epoch_end_val_loss = epoch_end_val_loss # prev_val_loss = val_loss running_loss = 0.0 val_loss = 0.0 i += 1 #train_generator.hard_stop() #val_generator.hard_stop() #break; loss_history = np.array(loss_history) epoch_end_relative_error_history = np.array( epoch_end_relative_error_history) with h5py.File( OutPath + "loss_history-depth_" + str(depth) + "-width_" + str(width) + ".h5", 'w') as loss_file, h5py.File( OutPath + "epoch_end_relative_error_history-depth_" + str(depth) + "-width_" + str(width) + ".h5", 'w') as epoch_end_relative_error_history_file: loss_file.create_dataset("loss", data=loss_history) epoch_end_relative_error_history_file.create_dataset( "relative_error", data=epoch_end_relative_error_history) from torch import save save(net.state_dict(), OutPath + "savedmodel_depth_" + str(depth) + "-width_" + str(width)) print('Finished Training') # Analysis from torch import max correct = 0 total = 0 #test_generator.start() test_loader = test_generator.generators[GENERATOR_ID].generate() test_count = 0 for test_index, test_data in enumerate(test_loader): test_count += 1 #images, labels = data #ECAL, HCAL, labels = Variable(images[0].cuda()), Variable(images[1].cuda()), labels.cuda() #outputs = net(ECAL, HCAL) ECAL, _, labels = test_data ECAL = np.swapaxes(ECAL, 1, 3) ECAL, labels = Variable( from_numpy(ECAL).cuda()), from_numpy(labels).long().cuda() ECAL = 50000.0 * ECAL outputs = net(ECAL) _, predicted = max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum() if (test_count >= 300): break #test_generator.hard_stop() print('Accuracy of the network on test images: %f %%' % (100 * float(correct) / total)) return (float(correct) / total) * 100.0
def visual(left, right, disp): args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() if args.savemodel == "": timestr = datetime.now().isoformat().replace(':','-').replace('.','MS') args.savemodel = timestr savepath = os.path.join(args.savedir, args.savemodel) if not os.path.exists(savepath): os.makedirs(savepath) log_file = os.path.join(savepath, 'run.log') logger = logging.getLogger('FS') logger.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(filename)s - %(lineno)s: %(message)s') fh = logging.StreamHandler(sys.stderr) fh.setLevel(logging.INFO) fh.setFormatter(formatter) logger.addHandler(fh) fh = logging.FileHandler(log_file) fh.setLevel(logging.INFO) fh.setFormatter(formatter) logger.addHandler(fh) for k,v in sorted(vars(args).items()): logger.info('%s - %s' % (k, v)) if args.seed != 0: torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) model = get_model(args) if args.cuda: model = nn.DataParallel(model) model.cuda() if args.loadmodel is not None: state_dict = torch.load(os.path.join(args.savedir, args.loadmodel, "max_loss.tar")) model.load_state_dict(state_dict['state_dict']) logger.info('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) all_time = 0. if args.dataset == "kitti": imgL = np.array(Image.open(left)).astype('float32') imgR = np.array(Image.open(right)).astype('float32') # pad to (384, 1248) top_pad = 384-imgL.shape[0] left_pad = 1248-imgL.shape[1] elif args.dataset == "middlebury": imgL = Image.open(left) imgR = Image.open(right) w, h = imgL.size imgL = imgL.resize((w // args.down_sample, h // args.down_sample), Image.ANTIALIAS) imgR = imgR.resize((w // args.down_sample, h // args.down_sample), Image.ANTIALIAS) w, h = imgL.size top_pad = 32 - h % 32 left_pad = 32 - w % 32 imgL = np.array(imgL) imgR = np.array(imgR) imgL = np.lib.pad(imgL,((top_pad,0),(0,left_pad),(0,0)),mode='constant',constant_values=0) imgR = np.lib.pad(imgR,((top_pad,0),(0,left_pad),(0,0)),mode='constant',constant_values=0) imgL = np.reshape(imgL, [1,imgL.shape[0],imgL.shape[1],3]) imgR = np.reshape(imgR, [1,imgR.shape[0],imgR.shape[1],3]) start_time = time.time() pred_disps = infer(model, args, imgL, imgR) end_time = time.time() - start_time all_time += end_time with open(disp, 'rb') as f: disp_img = Image.fromarray(readPFM(f).astype(np.float32)) w, h = disp_img.size disp_img = disp_img.resize((w // args.down_sample, h // args.down_sample), Image.ANTIALIAS) disp = np.array(disp_img).astype(np.float32) / 2 for index, pred_disp in enumerate(pred_disps): img = pred_disp[top_pad:,:-left_pad] error_map = convert_illum(np.abs(disp - img), args.maxdisp) disp_map = convert_color(img, args.maxdisp) final = np.maximum(error_map, disp_map) Image.fromarray(final).save(os.path.join(savepath, str(index) + '_' + left.split('/')[-1]))
def detect_image(image): args = args_parse() iou = args.iou model_image_size = args.image_size # 图片尺寸 model_path = args.model_path # 模型路径 confidence = args.confidence # 置信度 anchors = args.anchors_path anchors = get_anchors(anchors) # 锚框 class_names = args.classes_path class_names = get_classes(class_names) # 类别 net = YoloBody(len(anchors[0]), len(class_names)) net.eval() print('Loading weights into state dict...') net.load_state_dict(torch.load(model_path, map_location="cuda:0")) cuda = True if cuda: os.environ["CUDA_VISIBLE_DEVICES"] = '0' net = nn.DataParallel(net) net = net.to(device) print('Finished!') yolo_decodes = [] for i in range(3): yolo_decodes.append( DecodeBox(anchors[i], len(class_names), (model_image_size[1], model_image_size[0]))) print('{} model, anchors, and classes loaded.'.format(model_path)) # 画框设置不同的颜色 hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) image_shape = np.array(np.shape(image)[0:2]) img = Image.fromarray(cv2.cvtColor( image, cv2.COLOR_BGR2RGB)) # 将opencv的图片转换成Image的图片 # letterbox_image将原始的图片不失真的resize,添加灰度框,使之符合网络输入图片的尺寸 crop_img = np.array( letterbox_image( img, (args.image_size[0], args.image_size[1]))) # 使用opencv的方式 # crop_img = np.array(letterbox_image(image, (args.image_size[0], args.image_size[1]))) # 使用Image的方式 photo = np.array(crop_img, dtype=np.float32) # 转换成numpy形式 photo = photo.astype( np.float32) / 255.0 # 将读取的图片矩阵数值从(0~255)归一化到(0~1),得到全黑的图片,*255得彩色图片 photo = np.transpose(photo, (2, 0, 1)) # 转换图片的维度,通道数放在高和宽的前面 # batch_size的shape为(batch_size, (channels, height, width))pytorch要将图片做成一个batch_size的维度才可以训练 images = [] images.append(photo) # 扩充一个维度 images = np.asarray(images) # 将图片做成一个batch_size的维度才可以训练 将列表转换为数组,不会复制列表 with torch.no_grad(): images = torch.from_numpy(images) images = images.cuda() # 调用cuda outputs = net(images) # 得到模型的预测结果 output_list = [] for i in range(3): # 3个有效特征层 output_list.append(yolo_decodes[i]( outputs[i])) # 利用预测结果对先验框进行解码,获得最后的预测框,判断先验框内部是否包含物体,及先验框内部物体的种类 output = torch.cat( output_list, 1) # 对在给定的1(dim)维的待连接的张量序列进行连接操作,dim在(0, len(output_list[0]))之间 batch_detections = non_max_suppression( output, len(class_names), # 筛选出一定区域内得分最大的矩形框 conf_thres=confidence, nms_thres=iou) try: batch_detections = batch_detections[0].cpu().numpy() except: return image top_index = batch_detections[:, 4] * batch_detections[:, 5] > confidence # 先验框内部是否存在目标*属于某个种类 top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] # 得到最后的置信度 top_label = np.array(batch_detections[top_index, 6], np.int32) # 得到最后的类别 top_bboxes = np.array(batch_detections[top_index, :4]) # 得到最后的边界框 top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), \ np.expand_dims(top_bboxes[:, 1], -1), \ np.expand_dims(top_bboxes[:, 2], -1), \ np.expand_dims(top_bboxes[:, 3], -1) # 扩展维度 # 现在得到的检测结果是带灰框的,去掉灰条 boxes = yolo_correct_boxes( top_ymin, top_xmin, top_ymax, top_xmax, np.array([model_image_size[0], model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // model_image_size[0] for i, c in enumerate(top_label): # 最后的类别 predicted_class = class_names[c] # 所属的类 score = top_conf[i] # 得分 text = '{} {:.2f}'.format(predicted_class, score) print(text) # 输出类别和得分 top, left, bottom, right = boxes[i] top = top - 5 # 中心点的位置 left = left - 5 bottom = bottom + 5 right = right + 5 rgb = colors[class_names.index(predicted_class)] image = cv2.putText(image, text, (int(left + i), int(top - i)), cv2.FONT_HERSHEY_SIMPLEX, 1.1, rgb, 2) # 图片/添加的文字/左上角坐标/字体/字体大小/颜色/字体粗细 # for i in range(thickness): image = cv2.rectangle(image, (int(left + i), int(top)), (int(right), int(bottom)), rgb, 2) # 矩形 # image, 左下角坐标, 右上角坐标, color, 线条粗度 # # 画框框 # label = '{} {:.2f}'.format(predicted_class, score) # draw = ImageDraw.Draw(image) # label_size = draw.textsize(label, font) # label = label.encode('utf-8') # print(label) # # if top - label_size[1] >= 0: # text_origin = np.array([left, top - label_size[1]]) # else: # text_origin = np.array([left, top + 1]) # # for i in range(thickness): # draw.rectangle( # [left + i, top + i, right - i, bottom - i], # 矩形框 # outline=colors[class_names.index(predicted_class)]) # draw.rectangle( # [tuple(text_origin), tuple(text_origin + label_size)], # fill=colors[class_names.index(predicted_class)]) # draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) # del draw return image
ids = ids.to(DEVICE, dtype=torch.long) token_type_ids = token_type_ids.to(DEVICE, dtype=torch.long) mask = mask.to(DEVICE, dtype=torch.long) outputs = model(ids=ids, mask=mask, token_type_ids=token_type_ids) outputs = torch.sigmoid(outputs).cpu().detach().numpy() return outputs[0][0] @app.route("/predict") def predict(): sentence = request.args.get("sentence") positive_prediction = sentence_prediction(sentence, model=MODEL) negative_prediction = 1 - positive_prediction response = {} response["response"] = { 'positive': str(positive_prediction), 'negative': str(negative_prediction), 'sentence': str(sentence) } return flask.jsonify(response) if __name__ == "__main__": MODEL = BERTBaseUncased(bert_path, num_output) MODEL = nn.DataParallel(MODEL) MODEL.load_state_dict(torch.load(config.MODEL_PATH)) MODEL.to(DEVICE) MODEL.eval() app.run()
nn.BatchNorm2d(ngf), nn.ReLU(True), # state size. (ngf) x 32 x 32 nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False), nn.Tanh() # state size. (nc) x 64 x 64 ) def forward(self, input): return self.main(input) # Create the generator netG = Generator(ngpu).to(device) if (device.type == 'cuda') and (ngpu > 1): netG = nn.DataParallel(netG, list(range(ngpu))) # Apply the weights_init function to randomly initialize all weights # to mean=0, stdev=0.2. netG.apply(weights_init) # Print the model print(netG) # nc : number channel, ndf : number discriminator channel class Discriminator(nn.Module): def __init__(self, ngpu): super(Discriminator, self).__init__() self.ngpu = ngpu self.main = nn.Sequential(
warmup=args.lr_warmup, t_total=n_updates_total, b1=args.b1, b2=args.b2, e=args.e, l2=args.l2, vector_l2=args.vector_l2, max_grad_norm=args.max_grad_norm) compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion, args.lm_coef, model_opt) load_openai_pretrained_model(dh_model.transformer, n_ctx=n_ctx, n_special=n_special) dh_model.to(device) dh_model = nn.DataParallel(dh_model) n_updates = 0 n_epochs = 0 if dataset != 'stsb': trYt = trY if submit: path = os.path.join(save_dir, desc, 'best_params') torch.save(dh_model.state_dict(), make_path(path)) best_score = 0 for i in range(args.n_iter): print("running epoch", i) run_epoch() n_epochs += 1 log(save_dir, desc) if submit:
args.n_all_param = sum([p.nelement() for p in model.parameters()]) args.n_nonemb_param = sum([p.nelement() for p in model.layers.parameters()]) print("total params: {}".format(args.n_all_param)) print("total non-emb params: {}".format(args.n_nonemb_param)) if args.fp16: model = model.half() if args.multi_gpu: model = model.to(device) if args.gpu0_bsz >= 0: para_model = BalancedDataParallel(args.gpu0_bsz // args.batch_chunk, model, dim=1).to(device) else: para_model = nn.DataParallel(model, dim=1).to(device) else: para_model = model.to(device) #### optimizer if args.optim.lower() == 'sgd': if args.sample_softmax > 0: dense_params, sparse_params = [], [] for param in model.parameters(): if param.size() == model.word_emb.weight.size(): sparse_params.append(param) else: dense_params.append(param) optimizer_sparse = optim.SGD(sparse_params, lr=args.lr * 2) optimizer = optim.SGD(dense_params, lr=args.lr, momentum=args.mom) else:
# to(device) move nn to the device netG = Generator(args["gpu"]).to(device) netG.apply(weights_init) print(netG) netD = Discriminator(args["gpu"]).to(device) netD.apply(weights_init) print(netD) # netG.load_state_dict(torch.load(os.path.join(args["checkpoint_dir"],"generator.pkl"))) # netD.load_state_dict(torch.load(os.path.join(args["checkpoint_dir"],"discriminator.pkl"))) # if args["load"] == True: # netG.eval() # netD.eval() if (device.type == "cuda") and (args["gpu"] > 1): netD = nn.DataParallel(netD, list(range(args["gpu"]))) # criterion = nn.BCELoss() fixed_noise = torch.randn(64, 100, 1, 1, device=device) real_label = 1 fake_label = 0 optimizerD = optim.Adam(netD.parameters(), lr=args["lr"], betas=(args["beta"], 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=args["lr"], betas=(args["beta"], 0.999)) # optimizerD = optim.RMSprop(netD.parameters(), lr=args["lr"], alpha=0.9) # optimizerG = optim.RMSprop(netG.parameters(), lr=args["lr"], alpha=0.9) img_list = []
only_validate = False # from visdom import Visdom vis = Visdom(server='http://127.0.0.1', port=8097) # =================== config for model and dataset ===================================================================== from squid.data import Photo2PhotoData from squid.data import RandomCropPhoto2PhotoData from squid.model import SuperviseModel import torch import torch.nn as nn from squid.loss import VGGLoss from squid.net import AOD_Deep1_Net target_net = AOD_Deep1_Net() target_net = nn.DataParallel(target_net).cuda() model = SuperviseModel({ 'net': target_net, 'optimizer': torch.optim.Adam([{ 'name': 'net_params', 'params': target_net.parameters(), 'base_lr': 1e-4 }], betas=(0.9, 0.999), weight_decay=0.0005), 'lr_step_ratio': 0.5, 'lr_step_size':
def run_check_net(train_dl, val_dl, multi_gpu=[0, 1]): set_logger(LOG_PATH) logging.info('\n\n') #--- if MODEL == 'RESNET34': net = AtlasResNet34(debug=False).cuda(device=device) elif MODEL == 'RESNET18': net = AtlasResNet18(debug=False).cuda(device=device) elif MODEL == 'INCEPTION_V3': net = AtlasInceptionV3(debug=False, num_classes=28, aux_logits=AUX_LOGITS, transform_input=False).cuda(device=device) elif MODEL == 'BNINCEPTION': net = AtlasBNInception(debug=False, num_classes=28).cuda(device=device) # for param in net.named_parameters(): # if param[0][:8] in ['decoder5']:#'decoder5', 'decoder4', 'decoder3', 'decoder2' # param[1].requires_grad = False # dummy sgd to see if it can converge ... optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=LearningRate, momentum=0.9, weight_decay=0.0001) #optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=0.045)#LearningRate scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='max', factor=0.5, patience=4, #4 resnet34 verbose=False, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size, gamma=0.9, last_epoch=-1) if warm_start: logging.info('warm_start: ' + last_checkpoint_path) net, _ = load_checkpoint(last_checkpoint_path, net) # using multi GPU if multi_gpu is not None: net = nn.DataParallel(net, device_ids=multi_gpu) diff = 0 best_val_metric = 0.0 optimizer.zero_grad() #seed = get_seed() #seed = SEED #logging.info('aug seed: '+str(seed)) #ia.imgaug.seed(seed) #np.random.seed(seed) for i in range(NUM_EPOCHS): t0 = time.time() # iterate through trainset if multi_gpu is not None: net.module.set_mode('train') else: net.set_mode('train') train_loss_list, train_metric_list = [], [] #for seed in [1]:#[1, SEED]:#augment raw data with a duplicate one (augmented) #seed = get_seed() #np.random.seed(seed) #ia.imgaug.seed(i//10) for input_data, truth in train_dl: #set_trace() input_data, truth = input_data.to(device=device, dtype=torch.float), \ truth.to(device=device, dtype=torch.float) logit = net(input_data) #[:, :3, :, :] if multi_gpu is not None: _train_loss = net.module.criterion(logit, truth) _train_metric = net.module.metric(logit, truth) else: _train_loss = net.criterion(logit, truth) _train_metric = net.metric(logit, truth) train_loss_list.append(_train_loss.detach()) train_metric_list.append(_train_metric) #.detach() _train_loss.backward() #_train_loss.backward() optimizer.step() optimizer.zero_grad() train_loss = np.mean(train_loss_list) train_metric = np.mean(train_metric_list) # compute valid loss & iou (for memory efficiency, use batch) net.module.set_mode('valid') with torch.no_grad(): val_loss_list, val_metric_list = [], [] #input_data_valid, truth_valid = None, None for input_data, truth in val_dl: input_data, truth = input_data.to(device=device, dtype=torch.float), \ truth.to(device=device, dtype=torch.float)#device=device,device='cpu' logit = net(input_data) if multi_gpu is not None: _val_loss = net.module.criterion(logit, truth) _val_metric = net.module.metric(logit, truth) else: _val_loss = net.criterion(logit, truth) _val_metric = net.metric(logit, truth) val_loss_list.append(_val_loss) val_metric_list.append(_val_metric) val_loss = np.mean(val_loss_list) val_metric = np.mean(val_metric_list) #if multi_gpu is not None: # val_loss = net.module.criterion(logit, truth_valid) # val_metric = net.module.metric(logit, truth_valid) #else: # val_loss = net.criterion(logit, truth_valid) # val_metric = net.metric(logit, truth_valid) # Adjust learning_rate scheduler.step(val_metric) # if val_metric > best_val_metric: best_val_metric = val_metric is_best = True diff = 0 else: is_best = False diff += 1 if diff > early_stopping_round: logging.info( 'Early Stopping: val_iou does not increase %d rounds' % early_stopping_round) #print('Early Stopping: val_iou does not increase %d rounds'%early_stopping_round) break #save checkpoint checkpoint_dict = \ { 'epoch': i, 'state_dict': net.module.state_dict() if multi_gpu is not None else net.state_dict(), 'optim_dict' : optimizer.state_dict(), 'metrics': {'train_loss': train_loss, 'val_loss': val_loss, 'train_iou': train_metric, 'val_iou': val_metric} # 'metrics': {'train_loss1': train_loss1, # 'val_loss1': val_loss1, # 'train_iou1': train_iou1, # 'val_iou1': val_iou1} } save_checkpoint(checkpoint_dict, is_best=is_best, checkpoint=checkpoint_path) #if i%20==0: if i > -1: #logging.info('[EPOCH %05d][mask coverage zero] train_loss, train_iou: %0.5f, %0.5f; val_loss, val_iou: %0.5f, %0.5f'%(i, train_loss0.item(), train_iou0.item(), val_loss0.item(), val_iou0.item())) logging.info( '[EPOCH %05d][all classes] train_loss, train_metric: %0.5f, %0.5f; val_loss, val_metric: %0.5f, %0.5f; time elapsed: %0.1f min' % (i, train_loss.item(), train_metric.item(), val_loss.item(), val_metric.item(), (time.time() - t0) / 60)) #logging.info('[EPOCH %05d] train_loss, train_iou: %0.5f,%0.5f; val_loss, val_iou: %0.5f,%0.5f'%(i, train_loss.item(), train_iou.item(), val_loss.item(), val_iou.item())) i = i + 1
def train(args): """Run training loop with the given args. The function consists of the following steps: 1. Load model: gets the model from a checkpoint or from models/models.py. 2. Load optimizer and learning rate scheduler. 3. Get data loaders and class weights. 4. Get loss functions: cross entropy loss and weighted loss functions. 5. Get logger, evaluator, and saver. 6. Run training loop, evaluate and save model periodically. """ model_args = args.model_args logger_args = args.logger_args optim_args = args.optim_args data_args = args.data_args transform_args = args.transform_args task_sequence = TASK_SEQUENCES[data_args.task_sequence] print('gpus: ', args.gpu_ids) # Get model if model_args.ckpt_path: model_args.pretrained = False model, ckpt_info = ModelSaver.load_model(model_args.ckpt_path, args.gpu_ids, model_args, data_args) if not logger_args.restart_epoch_count: args.start_epoch = ckpt_info['epoch'] + 1 else: model_fn = models.__dict__[model_args.model] model = model_fn(task_sequence, model_args) num_covars = len(model_args.covar_list.split(';')) model.transform_model_shape(len(task_sequence), num_covars) if model_args.hierarchy: model = models.HierarchyWrapper(model, task_sequence) model = nn.DataParallel(model, args.gpu_ids) model = model.to(args.device) model.train() # Get optimizer and scheduler optimizer = util.get_optimizer(model.parameters(), optim_args) lr_scheduler = util.get_scheduler(optimizer, optim_args) # The optimizer is loaded from the ckpt if one exists and the new model # architecture is the same as the old one (classifier is not transformed). if model_args.ckpt_path and not model_args.transform_classifier: ModelSaver.load_optimizer(model_args.ckpt_path, args.gpu_ids, optimizer, lr_scheduler) # Get loaders and class weights train_csv_name = 'train' if data_args.uncertain_map_path is not None: train_csv_name = data_args.uncertain_map_path # Put all CXR training fractions into one dictionary and pass it to the loader cxr_frac = {'pocus': data_args.pocus_train_frac, 'hocus': data_args.hocus_train_frac, 'pulm': data_args.pulm_train_frac} train_loader = get_loader(data_args, transform_args, train_csv_name, task_sequence, data_args.su_train_frac, data_args.nih_train_frac, cxr_frac, data_args.tcga_train_frac, args.batch_size, frontal_lateral=model_args.frontal_lateral, is_training=True, shuffle=True, covar_list=model_args.covar_list, fold_num=data_args.fold_num) eval_loaders = get_eval_loaders(data_args, transform_args, task_sequence, args.batch_size, frontal_lateral=model_args.frontal_lateral, covar_list=model_args.covar_list, fold_num=data_args.fold_num) class_weights = train_loader.dataset.class_weights # Get loss functions uw_loss_fn = get_loss_fn(args.loss_fn, args.device, model_args.model_uncertainty, args.has_tasks_missing, class_weights=class_weights) w_loss_fn = get_loss_fn('weighted_loss', args.device, model_args.model_uncertainty, args.has_tasks_missing, class_weights=class_weights) # Get logger, evaluator and saver logger = TrainLogger(logger_args, args.start_epoch, args.num_epochs, args.batch_size, len(train_loader.dataset), args.device, normalization=transform_args.normalization) eval_args = {} eval_args['num_visuals'] = logger_args.num_visuals eval_args['iters_per_eval'] = logger_args.iters_per_eval eval_args['has_missing_tasks'] = args.has_tasks_missing eval_args['model_uncertainty'] = model_args.model_uncertainty eval_args['class_weights'] = class_weights eval_args['max_eval'] = logger_args.max_eval eval_args['device'] = args.device eval_args['optimizer'] = optimizer evaluator = get_evaluator('classification', eval_loaders, logger, eval_args) print("Eval Loaders: %d" % len(eval_loaders)) saver = ModelSaver(**vars(logger_args)) metrics = None lr_step = 0 # Train model while not logger.is_finished_training(): logger.start_epoch() for inputs, targets, info_dict, covars in train_loader: logger.start_iter() # Evaluate and save periodically metrics, curves = evaluator.evaluate(model, args.device, logger.global_step) logger.plot_metrics(metrics) metric_val = metrics.get(logger_args.metric_name, None) assert logger.global_step % logger_args.iters_per_eval != 0 or metric_val is not None saver.save(logger.global_step, logger.epoch, model, optimizer, lr_scheduler, args.device, metric_val=metric_val, covar_list=model_args.covar_list) lr_step = util.step_scheduler(lr_scheduler, metrics, lr_step, best_ckpt_metric=logger_args.metric_name) # Input: [batch_size, channels, width, height] with torch.set_grad_enabled(True): # with torch.autograd.set_detect_anomaly(True): logits = model.forward([inputs.to(args.device), covars]) # Scale up TB so that it's loss is counted for more if upweight_tb is True. if model_args.upweight_tb is True: tb_targets = targets.narrow(1, 0, 1) findings_targets = targets.narrow(1, 1, targets.shape[1] - 1) tb_targets = tb_targets.repeat(1, targets.shape[1] - 1) new_targets = torch.cat((tb_targets, findings_targets), 1) tb_logits = logits.narrow(1, 0, 1) findings_logits = logits.narrow(1, 1, logits.shape[1] - 1) tb_logits = tb_logits.repeat(1, logits.shape[1] - 1) new_logits = torch.cat((tb_logits, findings_logits), 1) else: new_logits = logits new_targets = targets unweighted_loss = uw_loss_fn(new_logits, new_targets.to(args.device)) weighted_loss = w_loss_fn(logits, targets.to(args.device)) if w_loss_fn else None logger.log_iter(inputs, logits, targets, unweighted_loss, weighted_loss, optimizer) optimizer.zero_grad() if args.loss_fn == 'weighted_loss': weighted_loss.backward() else: unweighted_loss.backward() optimizer.step() logger.end_iter() logger.end_epoch(metrics, optimizer)
def objective(SCI_RELU, SCI_BIAS, SCI_loss_type, SCI_optimizer, SCI_LR, SCI_MM, SCI_REGULARIZATION, SCI_EPOCHS, SCI_BATCH_SIZE, SCI_DROPOUT, SCI_L_SECOND, SCI_BN_MOMENTUM, SCI_SGD_MOMENTUM, SCI_BN_EPS, SCI_BN_STATS, SCI_LAST_LAYER, SCI_ACT_LAYER): global count, PercentVector, PercentVec, device, MaxCredit SCI_BATCH_SIZE = int(SCI_BATCH_SIZE) SCI_LAST_LAYER = int(SCI_LAST_LAYER) SCI_ACT_LAYER =int(SCI_ACT_LAYER) SCI_MM = round(SCI_MM,3) # real with three decimals between (0.001, 0.999) SCI_LR = round(SCI_LR,5) # real with five decimals between(1e-4, 7e-1) SCI_DROPOUT = round(SCI_DROPOUT,2) # real with two decimals between (0, 0.4) SCI_L_SECOND = int(SCI_L_SECOND) # integer between 2 and 64 SCI_EPOCHS = int(SCI_EPOCHS) # integer between (100, 500) SCI_BN_MOMENTUM = round(SCI_BN_MOMENTUM,2) # real with two decimals between (0, 0.99) SCI_SGD_MOMENTUM = round(SCI_SGD_MOMENTUM,2) # real with two decimals between (0, 0.99) SCI_loss_type = int(SCI_loss_type) # integer between 1 and 3 ('CrossEntropyLoss', 'MultiMarginLoss','NLLLoss') SCI_BN_EPS = int(SCI_BN_EPS) if int(SCI_RELU) == 1 : # integer between 1 and 2 ('True', 'False') SCI_RELU = True else: SCI_RELU = False if int(SCI_BIAS) == 1 : # integer between 1 and 2 ('True', 'False') SCI_BIAS = True else: SCI_BIAS = False SCI_REGULARIZATION = float(str(SCI_REGULARIZATION)) if SCI_BN_EPS == 0: BN_EPS = 5e-6 if SCI_BN_EPS == 1: BN_EPS = 1e-5 if SCI_BN_EPS == 2: BN_EPS = 5e-6 if SCI_BN_EPS == 3: BN_EPS = 1e-6 if SCI_BN_EPS == 4: BN_EPS = 5e-7 if SCI_BN_EPS == 5: BN_EPS = 1e-7 if SCI_BN_EPS == 6: BN_EPS = 5e-8 if SCI_BN_EPS == 7: BN_EPS = 1e-8 if SCI_BN_EPS == 8: BN_EPS = 3e-7 if SCI_BN_EPS == 9: BN_EPS = 8e-7 if SCI_BN_EPS == 10: BN_EPS = 1e-4 if SCI_BN_EPS == 11: BN_EPS = 5e-4 if SCI_BN_EPS == 12: BN_EPS = 8e-6 if SCI_BN_EPS == 13: BN_EPS = 1e-6 if SCI_BN_EPS == 14: BN_EPS = 8e-5 print('BN Batch EPS: ', BN_EPS) SCI_BN_STATS = int(SCI_BN_STATS) if SCI_BN_STATS == 0: BN_STATS = True if SCI_BN_STATS == 1: BN_STATS = False print('BN Batch STATS: ', BN_STATS) cnn = CNN6(L_FIRST, SCI_L_SECOND, KERNEL_X, SCI_BIAS, SCI_BN_MOMENTUM, SCI_RELU, SCI_DROPOUT, dataset.CLASSES, BN_EPS, BN_STATS, SCI_LAST_LAYER, SCI_ACT_LAYER) optimizer = Utillities.optimization_algorithms(SCI_optimizer,cnn, SCI_LR, SCI_SGD_MOMENTUM, SCI_REGULARIZATION) if GPU_SELECT == 2: if torch.cuda.device_count() > 1: cnn = nn.DataParallel(cnn,device_ids=[0, 1], dim = 0) cnn = cnn.cuda() if GPU_SELECT == 1: cnn.to(device) if GPU_SELECT == 0: cnn.to(device) cnn.apply(CNN6.weights_init2) #cnn.apply(CNN6.weights_reset) cnn.share_memory() loss_func = nn.CrossEntropyLoss() def create_loss(LOSS): print('*** LOSS ******:', LOSS) if LOSS == 1: loss_func = nn.BCELoss() print('********* BCELoss') if LOSS == 2: loss_func = nn.MultiMarginLoss() print('********* MMLoss') if LOSS == 4: loss_func = nn.CrossEntropyLoss() print('********* CrossEntropyLoss ') if LOSS == 3: loss_func = nn.TripletMarginLoss() print('********* TripletMarginLoss ') return loss_func MM = float(str(SCI_MM)) LR = float(str(SCI_LR)) train_losses = [] # to track the training loss as the model trains output = 0 loss = 0 accuracy = 0 early_stopping.counter = 0 early_stopping.best_score = None early_stopping.early_stop = False early_stopping.verbose = False TEST_RESULTS = torch.zeros(1,2) loss_type = create_loss(SCI_loss_type) #cnn, optimizer = amp.initialize( # cnn, optimizer, opt_level=BITS, # keep_batchnorm_fp32=True, loss_scale="dynamic" #) Utillities.listing(optimizer, SCI_SGD_MOMENTUM, SCI_BN_MOMENTUM, SCI_L_SECOND, SCI_LR, SCI_RELU, SCI_BIAS, SCI_loss_type, SCI_REGULARIZATION, SCI_BATCH_SIZE, SCI_DROPOUT, SCI_LAST_LAYER, SCI_ACT_LAYER) # Data Loader for easy mini-batch return in training SCI_BATCH_SIZE = int(SCI_BATCH_SIZE) train_loader = Data.DataLoader(dataset = dataset.train_dataset, batch_size = SCI_BATCH_SIZE, shuffle = True, num_workers = 0, drop_last=True, pin_memory=True) validation_loader = Data.DataLoader(dataset = dataset.validation_dataset, batch_size = 30, shuffle = True, num_workers = 0, drop_last=True, pin_memory=True) test_loader = Data.DataLoader(dataset = dataset.test_dataset, batch_size = 300, shuffle = True, num_workers = 0, drop_last=True, pin_memory=True) flag = True; for epoch in range(SCI_EPOCHS): loss = None cnn.train().cuda() for step, (train_data, train_target) in enumerate(train_loader): train_data, train_target = train_data.to(device), train_target.to(device) output = cnn(train_data) # forward pass: compute predicted outputs by passing inputs to the model loss = loss_func(output, train_target) train_losses.append(loss.item()) #batch_loss.backward() loss.backward() # backward pass: compute gradient of the loss with respect to model parameters optimizer.zero_grad() optimizer.step() # perform a single optimization step (parameter update) cnn.eval().cuda() # switch to evaluation (no change) mode valid_loss = 0 accuracy = 0 running_loss = 0.0 with torch.no_grad(): for step, (validation_data, validation_target) in enumerate(validation_loader): validation_data, validation_target = validation_data.to(device), validation_target.to(device) output = cnn(validation_data) valid_loss = loss_func(output, validation_target) running_loss += valid_loss.item() epoch_val_loss = running_loss / len(validation_loader) if epoch % 3 == 0: SCI_LR, flag = Utillities.variable_learning_rate(SCI_LR, LR_MIN, LR_MAX, 2, flag) SCI_DROPOUT = SCI_DROPOUT / 1.02 early_stopping(epoch_val_loss, cnn) #print('validation loss:',epoch_val_loss) train_losses = [] if early_stopping.early_stop: if os.path.exists('checkpoint.pt'): #cnn = TheModelClass(*args, **kwargs) print("Loaded the model with the lowest Validation Loss!") cnn.load_state_dict(torch.load('checkpoint.pt')) # Choose whatever GPU device number you want cnn.to(device) break running_loss = 0.0 cnn.eval() class_correct = list(0. for i in range(1000)) class_total = list(0. for i in range(1000)) with torch.no_grad(): for (test_data, test_target) in test_loader: test_data, test_target = test_data.to(device), test_target.to(device) outputs = cnn(test_data) _, predicted = torch.max(outputs, 1) c = (predicted == test_target).squeeze() dx = ((c.cpu()).numpy()).astype(int) #dx = 600 for i in range(test_target.size(0)): label = test_target[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(dataset.CLASSES): TEST_RESULTS[0,i] = class_correct[i] / dataset.TESTED_ELEMENTS[i] print('Class: ',i,' accuracy: ', TEST_RESULTS[0,i]) print('Class: ',i,' correct: ', class_correct[i],' of ',dataset.TESTED_ELEMENTS[i]) #mp.matshow(dx.reshape((20, 30))) #mp.ylabel('Correct Results') #mp.colorbar() #mp.show() percent = (TEST_RESULTS[0,0]+TEST_RESULTS[0,1])/2 print('Final percentage: ',percent) CreditCost = 0 CreditCost = int((1 - TEST_RESULTS[0,0]) * dataset.TESTED_ELEMENTS[0] + (1 - TEST_RESULTS[0,1]) * dataset.TESTED_ELEMENTS[1] * 5) #if TEST_RESULTS[0,0] < 0.05 or TEST_RESULTS[0,1] < 0.05 : # CreditCost = CreditCost + 300 print('Last epoch: ', epoch) if os.path.exists('checkpoint.pt'): os.remove('checkpoint.pt') #print('Credit Cost: ',CreditCost) #CreditCost = CreditCost + (SCI_SGD_MOMENTUM + SCI_DROPOUT + SCI_BATCH_SIZE + SCI_L_SECOND + SCI_optimizer + SCI_loss_type+ SCI_LR+ SCI_BN_EPS+SCI_BN_STATS+SCI_LAST_LAYER+SCI_ACT_LAYER)/10000 print('Credit Cost: ',CreditCost) if -CreditCost > MaxCredit : MaxCredit = -CreditCost print('Best Score So Far: ',MaxCredit) print() print() #CreditVector[count] = MaxCredit #CreditVec[count] = count # plot the data #fig = mp.figure() #ax = fig.add_subplot(1, 1, 1) #ax.plot(CreditVec, -CreditVector, color='tab:orange') #print(CreditVec, -CreditVector) #count = count + 1 # display the plot #mp.show() # return function (with unknown internals) we wish to maximize. return -CreditCost
LR = 1e-3 LR_DECAY = 0.95 DECAY_EVERY_N_EPOCHS = 20 model = Tiramisu.FCDensenet46(num_classes=2, dense_bn_size=None, dense_compression=1.0).cuda() criterion = nn.NLLLoss().cuda() optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=1e-4) para_num = sum([p.data.nelement() for p in model.parameters()]) print('Number of params: {}'.format(para_num)) print("Let's use", torch.cuda.device_count(), "GPUs!") if torch.cuda.device_count() > 1: model = nn.DataParallel(model) # In[3]: CHECKPOINT_PATH = '/home/shenxk/Documents/nodule_seg3d/results/checkpoint' #experiment_name = 'DenseU_2d_assignCenter_noWeight_fromScratch' experiment = train_utils.Experiment(model, criterion=criterion, optimizer=optimizer, checkpoint_path=CHECKPOINT_PATH, experiment_name=experiment_name) # In[6]: joint_transformer, dataset, dataloader = {}, {}, {}
args = parser.parse_args() G = StandardGenerator(output_size=(3, 64, 64), latent_size=args.LATENT_SIZE, num_classes=6) #G = ResGenerator(output_size=(3,64,64),num_classes=6,latent_size=args.LATENT_SIZE, \ # kernel_size=3,activation=nn.LeakyReLU(),conv_groups=1,attention=False,dropout_ratio=0) D = StandardProjectionDiscriminator(input_size=(3, 64, 64), apply_sigmoid=False, num_classes=6) #D = ResProjectionDiscriminator(input_size=(3,64,64),num_classes=6,kernel_size=3,activation=nn.LeakyReLU(), \ # attention=True,apply_sigmoid=False,conv_groups=1,dropout_ratio=0) if cuda.is_available(): G = nn.DataParallel(G.cuda()) D = nn.DataParallel(D.cuda()) g_optim = Adam(G.parameters(), lr=0.0002, betas=(0.5, 0.999)) d_optim = SGD(D.parameters(), lr=0.01, momentum=0.9) #d_optim = Adam(D.parameters(),lr=0.0002,betas=(0.5,0.999)) book_data = datasets.ImageFolder(root='data/Task2_split/Task2_Split/train', transform=transforms.Compose([ transforms.Resize((64, 64)), transforms.ToTensor() ])) book_dataset = torch.utils.data.DataLoader(book_data, batch_size=args.BATCH_SIZE, shuffle=True)
# Detect if we have a GPU available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('Current device: ' + str(device)) # Initialize the model for this run model_ft, input_size = initialize_model(num_classes, feature_extract, use_pretrained=True) # Send the model to GPU model_ft = model_ft.to(device) if torch.cuda.device_count() > 1: print("Using " + str(torch.cuda.device_count()) + " GPUs...") model_ft = nn.DataParallel(model_ft) # Gather the parameters to be optimized/updated in this run. params_to_update = model_ft.parameters() if feature_extract: params_to_update = [] for name,param in model_ft.named_parameters(): if param.requires_grad: params_to_update.append(param) total_params = sum(p.numel() for p in model_ft.parameters()) total_trainable_params = sum( p.numel() for p in model_ft.parameters() if p.requires_grad) print('Total parameters: ' + str(total_params) + ' Training parameters: ' + str(total_trainable_params) + '\n')
def inference(img, hmtnet_model_file='../main/model/hmt-net-fer.pth'): """ inference with pre-trained HMT-Net :param img: an image filepath or image numpy array :param hmtnet_model_file: :return: """ hmtnet = HMTNet() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") hmtnet = nn.DataParallel(hmtnet) hmtnet.load_state_dict(torch.load(hmtnet_model_file)) hmtnet.eval() if type(img) is str: image = resize(io.imread(img), (224, 224), mode='constant') else: img = cv2.resize(img, (224, 224)) image = img.astype(np.float64) image[:, :, 0] -= np.mean(image[:, :, 0]) image[:, :, 1] -= np.mean(image[:, :, 1]) image[:, :, 2] -= np.mean(image[:, :, 2]) image = np.transpose(image, [2, 0, 1]) input = torch.from_numpy(image).unsqueeze(0).float() hmtnet = hmtnet.to(device) input = input.to(device) tik = time.time() e_pred, a_pred, r_pred, g_pred = hmtnet.forward(input) tok = time.time() _, e_predicted = torch.max(e_pred.data, 1) _, a_predicted = torch.max(a_pred.data, 1) _, r_predicted = torch.max(r_pred.data, 1) _, g_predicted = torch.max(g_pred.data, 1) if int(g_predicted.to("cpu")) == 0: g_pred = 'male' elif int(g_predicted.to("cpu")) == 1: g_pred = 'female' elif int(g_predicted.to("cpu")) == 2: g_pred = 'unsure' if int(r_predicted.to("cpu")) == 0: r_pred = 'Caucasian' elif int(r_predicted.to("cpu")) == 1: r_pred = 'African-American' elif int(r_predicted.to("cpu")) == 2: r_pred = 'Asian' if int(a_predicted.to("cpu")) == 0: a_pred = '0-3' elif int(a_predicted.to("cpu")) == 1: a_pred = '4-19' elif int(a_predicted.to("cpu")) == 2: a_pred = '20-39' elif int(a_predicted.to("cpu")) == 3: a_pred = '40-69' elif int(a_predicted.to("cpu")) == 4: a_pred = '70+' if int(e_predicted.to("cpu")) == 0: e_pred = 'Surprise' elif int(e_predicted.to("cpu")) == 1: e_pred = 'Fear' elif int(e_predicted.to("cpu")) == 2: e_pred = 'Disgust' elif int(e_predicted.to("cpu")) == 3: e_pred = 'Happiness' elif int(e_predicted.to("cpu")) == 4: e_pred = 'Sadness' elif int(e_predicted.to("cpu")) == 5: e_pred = 'Anger' elif int(e_predicted.to("cpu")) == 6: e_pred = 'Neutral' # coord = c_pred.data.to("cpu").view(-1).tolist() # landmarks = [[coord[i], coord[i + 5]] for i in range(5)] return {'gender': g_pred, 'emotion': e_pred, 'race': r_pred, 'age': a_pred, 'elapse': tok - tik}
def main(): setup_default_logging() args, args_text = _parse_args() args.prefetcher = not args.no_prefetcher args.distributed = False if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed and args.num_gpu > 1: _logger.warning( 'Using more than one GPU per process in distributed mode is not allowed.Setting num_gpu to 1.') args.num_gpu = 1 args.device = 'cuda:0' args.world_size = 1 args.rank = 0 # global rank if args.distributed: args.num_gpu = 1 args.device = 'cuda:%d' % args.local_rank torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.rank = torch.distributed.get_rank() assert args.rank >= 0 if args.distributed: _logger.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (args.rank, args.world_size)) else: _logger.info('Training with a single process on %d GPUs.' % args.num_gpu) torch.manual_seed(args.seed + args.rank) model = create_model( args.model, pretrained=args.pretrained, num_classes=args.num_classes, drop_rate=args.drop, drop_connect_rate=args.drop_connect, # DEPRECATED, use drop_path drop_path_rate=args.drop_path, drop_block_rate=args.drop_block, global_pool=args.gp, bn_tf=args.bn_tf, bn_momentum=args.bn_momentum, bn_eps=args.bn_eps, checkpoint_path=args.initial_checkpoint) if args.local_rank == 0: _logger.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) num_aug_splits = 0 if args.aug_splits > 0: assert args.aug_splits > 1, 'A split of 1 makes no sense' num_aug_splits = args.aug_splits if args.split_bn: assert num_aug_splits > 1 or args.resplit model = convert_splitbn_model(model, max(num_aug_splits, 2)) use_amp = None if args.amp: # for backwards compat, `--amp` arg tries apex before native amp if has_apex: args.apex_amp = True elif has_native_amp: args.native_amp = True if args.apex_amp and has_apex: use_amp = 'apex' elif args.native_amp and has_native_amp: use_amp = 'native' elif args.apex_amp or args.native_amp: _logger.warning("Neither APEX or native Torch AMP is available, using float32. " "Install NVIDA apex or upgrade to PyTorch 1.6") if args.num_gpu > 1: if use_amp == 'apex': _logger.warning( 'Apex AMP does not work well with nn.DataParallel, disabling. Use DDP or Torch AMP.') use_amp = None model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() assert not args.channels_last, "Channels last not supported with DP, use DDP." else: model.cuda() if args.channels_last: model = model.to(memory_format=torch.channels_last) optimizer = create_optimizer(args, model) amp_autocast = suppress # do nothing loss_scaler = None if use_amp == 'apex': model, optimizer = amp.initialize(model, optimizer, opt_level='O1') loss_scaler = ApexScaler() if args.local_rank == 0: _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.') elif use_amp == 'native': amp_autocast = torch.cuda.amp.autocast loss_scaler = NativeScaler() if args.local_rank == 0: _logger.info('Using native Torch AMP. Training in mixed precision.') else: if args.local_rank == 0: _logger.info('AMP not enabled. Training in float32.') # optionally resume from a checkpoint resume_epoch = None if args.resume: resume_epoch = resume_checkpoint( model, args.resume, optimizer=None if args.no_resume_opt else optimizer, loss_scaler=None if args.no_resume_opt else loss_scaler, log_info=args.local_rank == 0) model_ema = None if args.model_ema: # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper model_ema = ModelEma( model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else '', resume=args.resume) if args.distributed: if args.sync_bn: assert not args.split_bn try: if has_apex and use_amp != 'native': # Apex SyncBN preferred unless native amp is activated model = convert_syncbn_model(model) else: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if args.local_rank == 0: _logger.info( 'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using ' 'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.') except Exception as e: _logger.error('Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1') if has_apex and use_amp != 'native': # Apex DDP preferred unless native amp is activated if args.local_rank == 0: _logger.info("Using NVIDIA APEX DistributedDataParallel.") model = ApexDDP(model, delay_allreduce=True) else: if args.local_rank == 0: _logger.info("Using native Torch DistributedDataParallel.") model = NativeDDP(model, device_ids=[args.local_rank]) # can use device str in Torch >= 1.1 # NOTE: EMA model does not need to be wrapped by DDP lr_scheduler, num_epochs = create_scheduler(args, optimizer) start_epoch = 0 if args.start_epoch is not None: # a specified start_epoch will always override the resume epoch start_epoch = args.start_epoch elif resume_epoch is not None: start_epoch = resume_epoch if lr_scheduler is not None and start_epoch > 0: lr_scheduler.step(start_epoch) if args.local_rank == 0: _logger.info('Scheduled epochs: {}'.format(num_epochs)) train_dir = os.path.join(args.data, 'train') if not os.path.exists(train_dir): _logger.error('Training folder does not exist at: {}'.format(train_dir)) exit(1) dataset_train = Dataset(train_dir) collate_fn = None mixup_fn = None mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None if mixup_active: mixup_args = dict( mixup_alpha=args.mixup, cutmix_alpha=args.cutmix, cutmix_minmax=args.cutmix_minmax, prob=args.mixup_prob, switch_prob=args.mixup_switch_prob, elementwise=args.mixup_elem, label_smoothing=args.smoothing, num_classes=args.num_classes) if args.prefetcher: assert not num_aug_splits # collate conflict (need to support deinterleaving in collate mixup) collate_fn = FastCollateMixup(**mixup_args) else: mixup_fn = Mixup(**mixup_args) if num_aug_splits > 1: dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits) train_interpolation = args.train_interpolation if args.no_aug or not train_interpolation: train_interpolation = data_config['interpolation'] loader_train = create_loader( dataset_train, input_size=data_config['input_size'], batch_size=args.batch_size, is_training=True, use_prefetcher=args.prefetcher, no_aug=args.no_aug, re_prob=args.reprob, re_mode=args.remode, re_count=args.recount, re_split=args.resplit, scale=args.scale, ratio=args.ratio, hflip=args.hflip, vflip=args.vflip, color_jitter=args.color_jitter, auto_augment=args.aa, num_aug_splits=num_aug_splits, interpolation=train_interpolation, mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, collate_fn=collate_fn, pin_memory=args.pin_mem, use_multi_epochs_loader=args.use_multi_epochs_loader ) eval_dir = os.path.join(args.data, 'val') if not os.path.isdir(eval_dir): eval_dir = os.path.join(args.data, 'validation') if not os.path.isdir(eval_dir): _logger.error('Validation folder does not exist at: {}'.format(eval_dir)) exit(1) dataset_eval = Dataset(eval_dir) loader_eval = create_loader( dataset_eval, input_size=data_config['input_size'], batch_size=args.validation_batch_size_multiplier * args.batch_size, is_training=False, use_prefetcher=args.prefetcher, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, distributed=args.distributed, crop_pct=data_config['crop_pct'], pin_memory=args.pin_mem, ) if args.jsd: assert num_aug_splits > 1 # JSD only valid with aug splits set train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing).cuda() elif mixup_active: # smoothing is handled with mixup target transform train_loss_fn = SoftTargetCrossEntropy().cuda() elif args.smoothing: train_loss_fn = LabelSmoothingCrossEntropy(smoothing=args.smoothing).cuda() else: train_loss_fn = nn.CrossEntropyLoss().cuda() validate_loss_fn = nn.CrossEntropyLoss().cuda() eval_metric = args.eval_metric best_metric = None best_epoch = None saver = None output_dir = '' if args.local_rank == 0: output_base = args.output if args.output else './output' exp_name = '-'.join([ datetime.now().strftime("%Y%m%d-%H%M%S"), args.model, str(data_config['input_size'][-1]) ]) output_dir = get_outdir(output_base, 'train', exp_name) decreasing = True if eval_metric == 'loss' else False saver = CheckpointSaver( model=model, optimizer=optimizer, args=args, model_ema=model_ema, amp_scaler=loss_scaler, checkpoint_dir=output_dir, recovery_dir=output_dir, decreasing=decreasing) with open(os.path.join(output_dir, 'args.yaml'), 'w') as f: f.write(args_text) try: for epoch in range(start_epoch, num_epochs): if args.distributed: loader_train.sampler.set_epoch(epoch) train_metrics = train_epoch( epoch, model, loader_train, optimizer, train_loss_fn, args, lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir, amp_autocast=amp_autocast, loss_scaler=loss_scaler, model_ema=model_ema, mixup_fn=mixup_fn) if args.distributed and args.dist_bn in ('broadcast', 'reduce'): if args.local_rank == 0: _logger.info("Distributing BatchNorm running means and vars") distribute_bn(model, args.world_size, args.dist_bn == 'reduce') eval_metrics = validate(model, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast) if model_ema is not None and not args.model_ema_force_cpu: if args.distributed and args.dist_bn in ('broadcast', 'reduce'): distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce') ema_eval_metrics = validate( model_ema.ema, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast, log_suffix=' (EMA)') eval_metrics = ema_eval_metrics if lr_scheduler is not None: # step LR for next epoch lr_scheduler.step(epoch + 1, eval_metrics[eval_metric]) update_summary( epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'), write_header=best_metric is None) if saver is not None: # save proper checkpoint with eval metric save_metric = eval_metrics[eval_metric] best_metric, best_epoch = saver.save_checkpoint(epoch, metric=save_metric) except KeyboardInterrupt: pass if best_metric is not None: _logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
train_num, valid_num = len(train_list), len(valid_list) num_batches = int(train_num / args.batch_size) valid_num_batches = int(valid_num / args.batch_size) train_dataset = HabitatDataset(args, train_list, transform=RPFAugmentation(size=args.img_size)) valid_dataset = HabitatDataset(args, valid_list, transform=RPFAugmentation(size=args.img_size)) rpf = build_net('train', args) if torch.cuda.device_count() > 1: print("Use", torch.cuda.device_count(), "GPUs") rpf = nn.DataParallel(rpf) if args.cuda: rpf.cuda() cudnn.benchmark = True criterion = nn.CrossEntropyLoss() logsoftmax = nn.LogSoftmax() optimizer = optim.Adam(rpf.parameters(), lr=args.lr, weight_decay=args.wd) def train(): rpf.train() lr = args.lr epoch = disp_loss = 0 eval_loss = 10000.
def TrainModel( initial_stacked_model_path=None, stacked_model_output_path='SavedModels/StackedFeatureExtractorAndRpn.pth', cross_validate=True): # GET THE ANCHOR SIZES. print('Loading anchors...') if USE_PRECOMPUTED_ANCHORS: anchors = pickle.load(open(ANCHORS_FILEPATH, 'rb')) else: anchors = ComputeAnchorSizes(GROUND_TRUTH_CSV_FILEPATH) pickle.dump(anchors, open(ANCHORS_FILEPATH, 'wb')) # LOAD THE DATASET. print('Loading training data...') if USE_PRECOMPUTED_TRAINING_DATA: all_images, all_anchor_class_labels, all_anchor_class_label_loss_masks, all_anchor_regression_targets = pickle.load( open(TRAINING_DATA_FILEPATH, 'rb')) else: worker_args = [ (line, anchors) for line in open(GROUND_TRUTH_CSV_FILEPATH).readlines()[1:] ] with Pool(7) as worker_pool: worker_results = worker_pool.map( GetRpnTrainingDataForGroundTruthLine, worker_args) all_images = [] all_anchor_class_labels = [] all_anchor_class_label_loss_masks = [] all_anchor_regression_targets = [] for image, class_labels, class_loss_mask, bbox_adjustments in worker_results: all_images.append(image) all_anchor_class_labels.append(class_labels) all_anchor_class_label_loss_masks.append(class_loss_mask) all_anchor_regression_targets.append(bbox_adjustments) all_images = torch.tensor(all_images, dtype=torch.float32) all_anchor_class_labels = torch.tensor(all_anchor_class_labels, dtype=torch.long) all_anchor_class_label_loss_masks = torch.tensor( all_anchor_class_label_loss_masks, dtype=torch.float32) all_anchor_regression_targets = torch.tensor( all_anchor_regression_targets, dtype=torch.float32) pickle.dump( (all_images, all_anchor_class_labels, all_anchor_class_label_loss_masks, all_anchor_regression_targets), open(TRAINING_DATA_FILEPATH, 'wb'), protocol=4) # CREATE THE MODEL. print('Creating model...') # Thoughts thus far... # - Resnets seem to work better than VGG. # - ResNet50 with a filter count coef of 64 seems to overfit. It works better with a # filter count coef of 32. The score for 64 you see below most likely would not have # improved with additional epochs, but the score for 32 would. # - ResNet34 uses way less VRAM than ResNet50 (i.e. 7.7 GB with a batch size of 8 vs 6.4 with 2). # - ResNet34 seems to work better than ResNet50 (better loss) # - ResNet18 is inferior to 34 (at stock widths) # - ResNet34 works really well at 2x width. Dropout might be benificial # because the test regression loss was much higher than the train regression loss. # - Instance norm resulted in slower training & better stability than batch norm. # - Using a slight dropout just before regression input *might* be slightly benificial # I would need to do more than 10 epochs to be sure. # - ResNet18 with a channel coef of 256 is inferior to 24 with a coef of 128 feature_extractor = ResNet34(IMAGE_CHANNELS, filter_count_coef=128, dropout_rate=.5) # feature_extractor = ResNet( # BasicBlock, # [3,6,36,3], # image_channels = 1, # filter_count_coef = 128, # dropout_rate = .4) # rpn_network = RPN( # input_channels = feature_extractor.FinalChannelsCount, # anchor_count = len(anchors)) rpn_network = RPN_WithHidden( input_channels=feature_extractor.FinalChannelsCount, anchor_count=len(anchors), classifier_dropout_rate=.5, regression_dropout_rate=.5, classifier_hidden_units=512, #256 regressor_hidden_units=512) #256 model = StackedFeatureExtractorAndRpn(feature_extractor, rpn_network) model = model.to(DEVICE) optimizer = optim.SGD(model.parameters(), .05, momentum=.9, nesterov=True) # CONVERT THE MODEL AND OPTIMIZER TO MIXED PRECISION. model, optimizer = amp.initialize(model, optimizer, opt_level="O1", loss_scale="dynamic") model = nn.DataParallel(model, device_ids=[0, 1]) # LOAD PRE-TRAINED WEIGHTS. if initial_stacked_model_path is not None: print('Loading pre-trained stacked model weights.') model.load_state_dict(torch.load(initial_stacked_model_path)) # DETERMINE WHICH EXAMPLES ARE USED FOR TRAINING AND TESTING. if cross_validate: training_indices = np.array( [i for i in range(len(all_images)) if i % 4 != 0]) testing_indices = np.array( [i for i in range(len(all_images)) if i % 4 == 0]) print('Using {} images for training and {} for testing.'.format( len(training_indices), len(testing_indices))) else: training_indices = np.array(range(len(all_images))) print('Training on {} images.'.format(len(all_images))) # TRAIN THE MODEL. EPOCH_COUNT = 100 learning_rate_scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[15, 50, 75, 90, 95], gamma=.5) for epoch in range(EPOCH_COUNT): print('Epoch {}/{}'.format(epoch + 1, EPOCH_COUNT)) # TRAIN THE NETWORK. epoch_batch_training_classification_losses = [] epoch_batch_training_regression_losses = [] all_training_batches = list( BatchSampler( RandomSampler(training_indices), batch_size=4, # 4 for double width resnet 34 drop_last=False)) for batch_num in range(len(all_training_batches)): # SET THE MODEL TO TRAINING MODE. model.train() # GET THE BATCH DATA. batch_indices = training_indices[all_training_batches[batch_num]] batch_images = all_images[batch_indices].to(DEVICE) batch_anchor_classes = all_anchor_class_labels[batch_indices].to( DEVICE) batch_anchor_classes_loss_masks = all_anchor_class_label_loss_masks[ batch_indices].to(DEVICE) batch_anchor_regression_targets = all_anchor_regression_targets[ batch_indices].to(DEVICE) # ZERO THE GRADIENTS. model.zero_grad() # FORWARD PASS. predicted_region_class_labels, region_regression_results = model( batch_images) # COMPUTE LOSSES. classification_loss_function = nn.CrossEntropyLoss( weight=torch.tensor([1, 15], dtype=torch.float32).to(DEVICE)) classification_loss = classification_loss_function( predicted_region_class_labels * batch_anchor_classes_loss_masks, batch_anchor_classes) element_wise_regression_loss_function = nn.SmoothL1Loss( reduction='none') element_wise_regression_loss = element_wise_regression_loss_function( region_regression_results, batch_anchor_regression_targets) element_wise_regression_loss = torch.sum( element_wise_regression_loss, dim=1, keepdim=True) element_wise_weights = batch_anchor_classes.float().view( element_wise_regression_loss.shape) regression_loss = 400 * torch.mean( element_wise_regression_loss * element_wise_weights) loss = classification_loss + regression_loss # UPDATE THE NETWORK. with amp.scale_loss(loss, optimizer) as scale_loss: # amp scale_loss.backward() optimizer.step() # SAVE THE LOSS. epoch_batch_training_classification_losses.append( classification_loss.detach().cpu().numpy()) epoch_batch_training_regression_losses.append( regression_loss.detach().cpu().numpy()) learning_rate_scheduler.step() if cross_validate: # SET THE MODEL TO EVALUATION MODE. model.eval() with torch.no_grad(): # CROSS-VALIDATE THE NETWORK. epoch_batch_testing_classification_losses = [] epoch_batch_testing_regression_losses = [] all_testing_batches = list( BatchSampler(RandomSampler(testing_indices), batch_size=8, drop_last=False)) for batch_num in range(len(all_testing_batches)): # GET THE BATCH DATA. batch_indices = testing_indices[ all_testing_batches[batch_num]] batch_images = all_images[batch_indices].to(DEVICE) batch_anchor_classes = all_anchor_class_labels[ batch_indices].to(DEVICE) batch_anchor_classes_loss_masks = all_anchor_class_label_loss_masks[ batch_indices].to(DEVICE) batch_anchor_regression_targets = all_anchor_regression_targets[ batch_indices].to(DEVICE) # FORWARD PASS. predicted_region_class_labels, region_regression_results = model( batch_images) # COMPUTE LOSSES. classification_loss_function = nn.CrossEntropyLoss( weight=torch.tensor([1, 1], dtype=torch.float32).to( DEVICE)) classification_loss = classification_loss_function( predicted_region_class_labels * batch_anchor_classes_loss_masks, batch_anchor_classes) element_wise_regression_loss_function = nn.SmoothL1Loss( reduction='none') element_wise_regression_loss = element_wise_regression_loss_function( region_regression_results, batch_anchor_regression_targets) element_wise_regression_loss = torch.sum( element_wise_regression_loss, dim=1, keepdim=True) element_wise_weights = batch_anchor_classes.float().view( element_wise_regression_loss.shape) regression_loss = 400 * torch.mean( element_wise_regression_loss * element_wise_weights) loss = classification_loss + regression_loss # SAVE THE LOSS. epoch_batch_testing_classification_losses.append( classification_loss.detach().cpu().numpy()) epoch_batch_testing_regression_losses.append( regression_loss.detach().cpu().numpy()) # SAVE THE TRAINED MODEL. if stacked_model_output_path is not None: torch.save(model.state_dict(), stacked_model_output_path) if cross_validate: print('\tTesting mean loss - c: {:.04f}, r: {:.04f}'.format( np.mean(epoch_batch_testing_classification_losses), np.mean(epoch_batch_testing_regression_losses))) print('\tTraining mean loss - c: {:.04f}, r: {:.04f}'.format( np.mean(epoch_batch_training_classification_losses), np.mean(epoch_batch_training_regression_losses))) # SAVE THE TRAINED MODEL. if stacked_model_output_path is not None: torch.save(model.state_dict(), stacked_model_output_path)
def factory(opt, cuda=True, data_parallel=True, define_forward=True): opt = copy.copy(opt) # forward_* will be better handle in futur release def forward_resnet(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) if opt.get('conv', False): return x x = self.layer4(x) if 'pooling' in opt and opt['pooling']: x = self.avgpool(x) div = x.size(3) + x.size(2) x = x.sum(3) x = x.sum(2) x = x.view(x.size(0), -1) x = x.div(div) return x def forward_resnext(self, x): x = self.features(x) if 'pooling' in opt and opt['pooling']: x = self.avgpool(x) div = x.size(3) + x.size(2) x = x.sum(3) x = x.sum(2) x = x.view(x.size(0), -1) x = x.div(div) return x if opt['arch'] in pytorch_resnet_names: model = pytorch_models.__dict__[opt['arch']](pretrained=True) elif opt['arch'] == 'fbresnet152': model = torch7_models.__dict__[opt['arch']](num_classes=1000, pretrained='imagenet') elif opt['arch'] in torch7_resnet_names: model = torch7_models.__dict__[opt['arch']](num_classes=1000, pretrained='imagenet') else: raise ValueError # As utilizing the pretrained_model on 224 image, # when applied on 448 images, please set the corresponding [dilation] pdb.set_trace() set_dilation(model, opt.get('dilation', 1)) # To use the factory to retrieve the original model if define_forward: convnet = model # ugly hack in case of DataParallel wrapping model.forward = lambda x: forward_resnet(convnet, x) if data_parallel: model = nn.DataParallel(model).cuda() if not cuda: raise ValueError if cuda: model.cuda() return model