コード例 #1
0
def train_classifier(config: Config):
    config_json = config.toDictionary()
    print('train_classifier')
    print(config_json)
    from training.train import train
    from torch.utils.data.dataloader import DataLoader
    from data.loader_segmentation import Segmentation

    model = get_model(config.classifier_name)
    
    wandb.init(entity='kobus_wits', project='wass_classifier', name=config.sweep_id + '_c_' + config.classifier_name, config=config_json)
    wandb.watch(model)

    train(
        model=model,
        dataloaders = {
            'train': DataLoader(
                Segmentation(
                    config.classifier_dataset_root,
                    source='train',
                    augmentation='train',
                    image_size=config.classifier_image_size
                ),
                batch_size=config.classifier_batch_size_train,
                shuffle=True,
                pin_memory=True,
                num_workers=4,
                prefetch_factor=4
            ),
        },
        epochs=config.classifier_epochs,
        validation_mod=10
    )

    wandb.finish()
コード例 #2
0
def main(cfg, _log):
    init_seed(cfg.seed)

    _log.info("=> fetching img pairs.")
    train_set, valid_set = get_dataset(cfg)

    _log.info('{} samples found, {} train samples and {} test samples '.format(
        len(valid_set) + len(train_set), len(train_set), len(valid_set)))

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=cfg.train.batch_size,
                                               num_workers=cfg.train.workers,
                                               pin_memory=True,
                                               shuffle=True)

    max_test_batch = 4
    if type(valid_set) is torch.utils.data.ConcatDataset:
        valid_loader = [
            torch.utils.data.DataLoader(s,
                                        batch_size=min(max_test_batch,
                                                       cfg.train.batch_size),
                                        num_workers=min(4, cfg.train.workers),
                                        pin_memory=True,
                                        shuffle=False)
            for s in valid_set.datasets
        ]
        valid_size = sum([len(l) for l in valid_loader])
    else:
        valid_loader = torch.utils.data.DataLoader(
            valid_set,
            batch_size=min(max_test_batch, cfg.train.batch_size),
            num_workers=min(4, cfg.train.workers),
            pin_memory=True,
            shuffle=False)
        valid_size = len(valid_loader)

    if cfg.train.epoch_size == 0:
        cfg.train.epoch_size = len(train_loader)
    if cfg.train.valid_size == 0:
        cfg.train.valid_size = valid_size
    cfg.train.epoch_size = min(cfg.train.epoch_size, len(train_loader))
    cfg.train.valid_size = min(cfg.train.valid_size, valid_size)

    model = get_model(cfg.model)
    loss = get_loss(cfg.loss)
    trainer = get_trainer(cfg.trainer)(train_loader, valid_loader, model, loss,
                                       _log, cfg.save_root, cfg.train)

    for name, param in model.named_parameters():
        if ("pyramid" in name) == False:
            param.requires_grad = False

        else:
            print(name, param.requires_grad)
            #parameter.requires_grad = False
    epoch, weights = load_checkpoint('checkpoints/Sintel/pwclite_ar.tar')
    print("traiiiiiiiiiiiiiiiiiiiiiiiiiiiiin", weights)

    trainer.model = model
    trainer.train()
コード例 #3
0
ファイル: test_model.py プロジェクト: PengchaoHan/EasyFL
 def run_model_common(self, dataset, dataset_file_path, model_name,
                      dim_predict):
     data_train, data_test = load_data(dataset, dataset_file_path,
                                       model_name)
     model = get_model(model_name,
                       dataset,
                       rand_seed=seed,
                       step_size=step_size,
                       device=device,
                       flatten_weight=False)
     data_loader = DataLoader(data_train,
                              batch_size=1,
                              shuffle=False,
                              num_workers=0)
     for i, (images, labels) in enumerate(data_loader):
         w = model.get_weight()
         model.assign_weight(w)
         model.model.train()
         images, labels = images.to(device), labels.to(device)
         model.optimizer.zero_grad()
         output = model.model(images)
         self.assertEqual(len(output[0]), dim_predict)
         loss = model.loss_fn(output, labels)
         loss.backward()
         model.optimizer.step()
         print(loss.item())
         break
コード例 #4
0
def main():
    args = opts().parse()
    train_sampler, train_loader, val_loader, test_loader = get_dataloaders(
        dataset='cifar10', batch=args.batch_size, dataroot="../../data/cifar10",
        aug=args.aug, cutout=args.cutout, K=args.K
    )

    if args.gpu > 0:
        cudnn.benchmark = True

    # model = wideresnet.Wide_ResNet(40, 2, 0.3, 10).to(args.device)
    model = get_model(args.model).to(args.device)
    _, best_score = train(model, train_loader, test_loader, args)
コード例 #5
0
ファイル: _basic_train.py プロジェクト: zyl1336110861/ARFlow
def main(cfg, _log):
    init_seed(cfg.seed)

    _log.info("=> fetching img pairs.")
    train_set, valid_set = get_dataset(cfg)

    _log.info('{} samples found, {} train samples and {} test samples '.format(
        len(valid_set) + len(train_set), len(train_set), len(valid_set)))

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=cfg.train.batch_size,
                                               num_workers=cfg.train.workers,
                                               pin_memory=True,
                                               shuffle=True)

    max_test_batch = 4
    if type(valid_set) is torch.utils.data.ConcatDataset:
        valid_loader = [
            torch.utils.data.DataLoader(s,
                                        batch_size=min(max_test_batch,
                                                       cfg.train.batch_size),
                                        num_workers=min(4, cfg.train.workers),
                                        pin_memory=True,
                                        shuffle=False)
            for s in valid_set.datasets
        ]
        valid_size = sum([len(l) for l in valid_loader])
    else:
        valid_loader = torch.utils.data.DataLoader(
            valid_set,
            batch_size=min(max_test_batch, cfg.train.batch_size),
            num_workers=min(4, cfg.train.workers),
            pin_memory=True,
            shuffle=False)
        valid_size = len(valid_loader)

    if cfg.train.epoch_size == 0:
        cfg.train.epoch_size = len(train_loader)
    if cfg.train.valid_size == 0:
        cfg.train.valid_size = valid_size
    cfg.train.epoch_size = min(cfg.train.epoch_size, len(train_loader))
    cfg.train.valid_size = min(cfg.train.valid_size, valid_size)

    model = get_model(cfg.model)
    loss = get_loss(cfg.loss)
    trainer = get_trainer(cfg.trainer)(train_loader, valid_loader, model, loss,
                                       _log, cfg.save_root, cfg.train)

    trainer.train()
コード例 #6
0
def worker(id, cfg, shared):
    # init logger
    curr_time = datetime.datetime.now().strftime("%y%m%d%H%M%S")
    _log = init_logger(log_dir=cfg.save_root, filename=curr_time[6:] + '.log')
    if id == 0:
        _log.info(id, '=> will save everything to {}'.format(cfg.save_root))

    # show configurations
    cfg_str = pprint.pformat(cfg)
    if id == 0: _log.info(id, '=> configurations \n ' + cfg_str)

    # Distributed
    if cfg.mp.enabled:
        if cfg.train.n_gpu > 0:
            dist.init_process_group(backend="nccl",
                                    init_method="env://",
                                    world_size=cfg.mp.workers,
                                    rank=id)
        else:
            dist.init_process_group(backend="gloo",
                                    init_method="env://",
                                    world_size=cfg.mp.workers,
                                    rank=id)

    # Get Model and Loss
    model = get_model(cfg, id)
    loss = get_loss(cfg, id)

    # Create Trainer
    trainer = get_trainer(cfg)(id, model, loss, _log, cfg.save_root, cfg,
                               shared)

    # Train or Test
    try:
        if cfg.eval:
            trainer.eval()
        else:
            trainer.train()
    except Exception as e:
        import traceback
        traceback.print_exc()

    # Destroy
    if cfg.mp.enabled:
        dist.destroy_process_group()
コード例 #7
0
def handle_segmentation(vargin, scan):
    tissues = vargin['tissues']

    print('')

    if len(tissues) == 0:
        raise ValueError('No tissues specified for segmentation')

    for tissue in tissues:
        segment_weights_path = vargin[SEGMENTATION_WEIGHTS_DIR_KEY][0]
        tissue.find_weights(segment_weights_path)
        # Load model
        dims = scan.get_dimensions()
        input_shape = (dims[0], dims[1], 1)
        model = get_model(vargin[SEGMENTATION_MODEL_KEY],
                          input_shape=input_shape,
                          weights_path=tissue.weights_filepath)
        model.batch_size = vargin[SEGMENTATION_BATCH_SIZE_KEY]
        scan.segment(model, tissue)
コード例 #8
0
ファイル: main.py プロジェクト: transmuteAI/RepeatNet
 def __init__(self, args, batch_size=128):
     super().__init__()
     self.args = args
     self.save_hyperparameters()
     self.batch_size = batch_size
     self.dataset = args.dataset
     self.model = get_model(args.model_name, args.num_classes, args)
     if self.args.weights_path:
         weights = torch.load(self.args.weights_path)['state_dict']
         weights = OrderedDict([
             (k[6:], v) for k, v in weights.items()
             if ('bn' not in k and 'downsample' not in k and 'fc' not in k)
         ])  #[6:] to remove 'model.' in front of keys
         self.model.load_state_dict(weights, False)
     if self.args.freeze_weights:
         for k, params in self.model.named_parameters():
             if 'bn' in k or 'downsample' in k or 'fc' in k or 'binary_activation' in k:
                 params.requires_grad = True
             else:
                 params.requires_grad = False
コード例 #9
0
def main(args):
    # print learning settings.
    print('GPU: {}'.format(args.gpu))
    print('# Mini-batch size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('Using {} dataset.'.format(args.dataset))
    print('')
    # Load datasets.
    trainset, testset = get_dataset(args.dataset)
    # Data transfomer
    transformer = Transformer(trainset,
                              pca=False,
                              normalize=args.normalize,
                              trans=args.augment)
    # Make transform datasets.
    trainset = D.TransformDataset(trainset, transformer.train)
    testset = D.TransformDataset(testset, transformer.test)
    # Setup dataset iterators.
    train_iter = BCIterator(trainset, args.batchsize, classes=args.classes)
    test_iter = chainer.iterators.SerialIterator(testset, args.batchsize,
                                                 False, False)
    # Set CNN model.
    model = MultiplexClassifier(get_model(args.model, args.classes),
                                lossfun=kl_divergence,
                                accfun=accuracy_mix)
    # Setup GPU
    if args.gpu >= 0:
        cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    # Run to get model information.
    one_predict(model, train_iter, args.gpu)
    print(str_info(model))
    # setup trainer
    trainer = setup_trainer(args, train_iter, test_iter, model)
    # Run to get model information.
    one_predict(model, train_iter, args.gpu)
    print(str_info(model))
    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)
    # run
    trainer.run()
コード例 #10
0
def train_affinitynet(config: Config):
    config_json = config.toDictionary()
    print('train_affinitynet')
    print(config_json)
    from training.train import train
    from torch.utils.data.dataloader import DataLoader
    from data.loader_segmentation import Segmentation
    from artifacts.artifact_manager import artifact_manager

    model = get_model(config.affinity_net_name)
    
    wandb.init(entity='kobus_wits', project='wass_affinity', name=config.sweep_id + '_a_' + config.affinity_net_name, config=config_json)
    wandb.watch(model)

    train(
        model=model,
        dataloaders = {
            'train': DataLoader(
                Segmentation(
                    config.classifier_dataset_root,
                    source='train',
                    augmentation='train',
                    image_size=config.affinity_net_image_size,
                    requested_labels=['affinity'],
                    affinity_root=artifact_manager.getDir()
                ),
                batch_size=config.affinity_net_batch_size,
                shuffle=False,
                pin_memory=False,
                num_workers=4,
                prefetch_factor=4
            ),
        },
        epochs=config.affinity_net_epochs,
        validation_mod=10
    )

    wandb.finish()
コード例 #11
0
    def __init__(self, settings: dict, settings_to_log: list):
        self.settings = settings
        self.settings_to_log = settings_to_log

        self.threshold = self.settings['threshold']
        self.start_epoch = self.settings['start_epoch']
        self.dataset = self.settings['dataset']
        self.batch_size = self.settings['batch_size']
        self.workers = self.settings['workers']
        self.cuda = self.settings['cuda']
        self.fp16 = self.settings['fp16']
        self.epochs = self.settings['epochs']
        self.ignore_index = self.settings['ignore_index']
        self.loss_reduction = self.settings['loss_reduction']

        # -------------------- Define Data loader ------------------------------
        self.loaders, self.nclass, self.plotter = make_data_loader(settings)
        self.train_loader, self.val_loader, self.test_loader = [self.loaders[key] for key in ['train', 'val', 'test']]

        # -------------------- Define model ------------------------------------
        self.model = get_model(self.settings)

        # -------------------- Define optimizer and its options ----------------
        self.optimizer = define_optimizer(self.model, self.settings['optimizer'], self.settings['optimizer_params'])
        if self.settings['lr_scheduler']:
            self.lr_scheduler = LRScheduler(self.settings['lr_scheduler'], self.optimizer, self.batch_size)

        # -------------------- Define loss -------------------------------------
        input_size = (self.batch_size, self.nclass, *self.settings['target_size'])
        self.criterion = CustomLoss(input_size=input_size, ignore_index=self.ignore_index, reduction=self.loss_reduction)

        self.evaluator = Evaluator(metrics=self.settings['metrics'], num_class=self.nclass, threshold=self.settings['threshold'])

        self.logger = MainLogger(loggers=self.settings['loggers'], settings=settings, settings_to_log=settings_to_log)
        if self.settings['resume']:
            self.resume_checkpoint(self.settings['resume'])

        self.metric_to_watch = 0.0
コード例 #12
0
ファイル: train.py プロジェクト: roger60229/groundnet
            ('finetune' , args.finetune),
            ('use_outer' , args.use_outer),
            ('box_usage', args.box_usage),
            ('feat_box' , feat_box),
            ('loss' , args.loss),
            ('optim' , args.optim),
            ('lr' , args.lr),
            ('lr_min' , args.lr_min),
            ('lr_decay' , args.lr_decay),
            ('weight_decay' , args.weight_decay),
            ('clip' , args.clip),
            ('encoder' , args.encoder),
            ('only_spatial' , args.only_spatial),
            ('phrase_context' , args.phrase_context),
            ('debug'  , args.debug_mode)])
  net = get_model(vocabs, config)

if not os.path.exists(args.save_path):
  os.makedirs(args.save_path)
snapshot_pfx   = 'snapshot.' + ".".join([key.upper()+str(config[key]) for key in config.keys() if key[0] != 'f'])
snapshot_model = os.path.join(args.save_path, snapshot_pfx + '.model')
experiment_log = open(os.path.join(args.save_path, snapshot_pfx + '.log'),'w')
out_file = os.path.join(args.save_path, snapshot_pfx + '.tst-eval.json')
print("="*20)
print("Starting training {} model".format(config['model']))
print("Snapshots {}.*\nDetails:".format(os.path.join(args.save_path, snapshot_pfx)))
for key in config:
  print("{} : {}".format(key,config[key]))
print("="*20)

コード例 #13
0
    def __init__(self):
        self.transforms = None
        self.index = 0
        self.prev_index = -1
        self.bridge = CvBridge()
        self.prev_seq = None
        self.just_started = True
        self.mode = rospy.get_param('~mode', 'stereo')
        self.plan = rospy.get_param('~plan', 0)
        self.return_mode = rospy.get_param('~return_mode', 0)
        print(self.mode, self.plan)
        params_file = 'real_sensor.json'

        # Planner
        self.planner = None
        if self.plan:
            self.planner = Planner(mode="real", params_file=params_file)

        #  Params
        with open(params_file) as f:
            self.param = json.load(f)
        self.param["d_candi"] = img_utils.powerf(self.param["s_range"],
                                                 self.param["e_range"], 64, 1.)

        # Gen Model Datum
        intrinsics = torch.tensor(self.param["intr_rgb"]).unsqueeze(0) / 4
        intrinsics[0, 2, 2] = 1.
        intrinsics_up = torch.tensor(self.param["intr_rgb"]).unsqueeze(0)
        s_width = self.param["size_rgb"][0] / 4
        s_height = self.param["size_rgb"][1] / 4
        focal_length = np.mean(
            [intrinsics_up[0, 0, 0], intrinsics_up[0, 1, 1]])
        h_fov = math.degrees(
            math.atan(intrinsics_up[0, 0, 2] / intrinsics_up[0, 0, 0]) * 2)
        v_fov = math.degrees(
            math.atan(intrinsics_up[0, 1, 2] / intrinsics_up[0, 1, 1]) * 2)
        pixel_to_ray_array = View.normalised_pixel_to_ray_array(\
                width= int(s_width), height= int(s_height), hfov = h_fov, vfov = v_fov,
                normalize_z = True)
        pixel_to_ray_array_2dM = np.reshape(
            np.transpose(pixel_to_ray_array, axes=[2, 0, 1]), [3, -1])
        pixel_to_ray_array_2dM = torch.from_numpy(
            pixel_to_ray_array_2dM.astype(np.float32)).unsqueeze(0)
        left_2_right = torch.tensor(self.param["left_2_right"])
        if self.mode == "stereo" or self.mode == "stereo_lc":
            src_cam_poses = torch.cat(
                [left_2_right.unsqueeze(0),
                 torch.eye(4).unsqueeze(0)]).unsqueeze(0)
        elif self.mode == "mono" or self.mode == "mono_lc":
            src_cam_poses = torch.cat(
                [torch.eye(4).unsqueeze(0),
                 torch.eye(4).unsqueeze(0)]).unsqueeze(0)
        self.model_datum = dict()
        self.model_datum["intrinsics"] = intrinsics.cuda()
        self.model_datum["intrinsics_up"] = intrinsics_up.cuda()
        self.model_datum["unit_ray"] = pixel_to_ray_array_2dM.cuda()
        self.model_datum["src_cam_poses"] = src_cam_poses.cuda()
        self.model_datum["d_candi"] = self.param["d_candi"]
        self.model_datum["d_candi_up"] = self.param["d_candi"]
        self.model_datum["rgb"] = None
        self.model_datum["prev_output"] = None
        self.model_datum["prev_lc"] = None
        self.rgb_pinned = torch.zeros(
            (1, 2, 3, self.param["size_rgb"][1],
             self.param["size_rgb"][0])).float().pin_memory()
        self.dpv_pinned = torch.zeros(
            (1, 64, int(self.param["size_rgb"][1]),
             int(self.param["size_rgb"][0]))).float().pin_memory()
        self.pred_depth_pinned = torch.zeros(
            (int(self.param["size_rgb"][1]),
             int(self.param["size_rgb"][0]))).float().pin_memory()
        self.true_depth_pinned = torch.zeros(
            (int(self.param["size_rgb"][1]),
             int(self.param["size_rgb"][0]))).float().pin_memory()
        self.unc_pinned = torch.zeros(1, 64, int(
            self.param["size_rgb"][0])).float().pin_memory()
        __imagenet_stats = {'mean': [0.485, 0.456, 0.406],\
                            'std': [0.229, 0.224, 0.225]}
        self.transformer = transforms.Normalize(**__imagenet_stats)

        # Load Model
        if self.mode == "stereo":
            model_name = 'default_stereo_ilim'
        elif self.mode == "mono":
            model_name = 'default_ilim'
        elif self.mode == "mono_lc":
            model_name = 'default_exp7_lc_ilim'
        elif self.mode == 'stereo_lc':
            model_name = 'default_stereo_exp7_lc_ilim'
        cfg_path = 'configs/' + model_name + '.json'
        model_path = ''
        with open(cfg_path) as f:
            self.cfg = EasyDict(json.load(f))
        self.model = get_model(self.cfg, 0)
        epoch, weights = load_checkpoint('outputs/checkpoints/' + model_name +
                                         '/' + model_name +
                                         '_model_best.pth.tar')
        from collections import OrderedDict
        new_weights = OrderedDict()
        model_keys = list(self.model.state_dict().keys())
        weight_keys = list(weights.keys())
        for a, b in zip(model_keys, weight_keys):
            new_weights[a] = weights[b]
        weights = new_weights
        self.model.load_state_dict(weights)
        self.model = self.model.cuda()
        self.model.eval()
        print("Model Loaded")

        # ROS
        self.q_msg = deque([], 1)
        lth = ConsumerThread(self.q_msg, self.handle_msg)
        lth.setDaemon(True)
        lth.start()
        self.queue_size = 3
        self.sync = functools.partial(ApproximateTimeSynchronizer, slop=0.01)
        self.left_camsub = message_filters.Subscriber(
            '/left_camera_resized/image_color_rect', sensor_msgs.msg.Image)
        self.right_camsub = message_filters.Subscriber(
            'right_camera_resized/image_color_rect', sensor_msgs.msg.Image)
        self.depth_sub = message_filters.Subscriber(
            '/left_camera_resized/depth', sensor_msgs.msg.Image
        )  # , queue_size=self.queue_size, buff_size=2**24
        self.ts = self.sync(
            [self.left_camsub, self.right_camsub, self.depth_sub],
            self.queue_size)
        self.ts.registerCallback(self.callback)
        self.prev_left_cammsg = None
        self.depth_pub = rospy.Publisher('ros_net/depth',
                                         sensor_msgs.msg.Image,
                                         queue_size=self.queue_size)
        self.depth_color_pub = rospy.Publisher('ros_net/depth_color',
                                               sensor_msgs.msg.Image,
                                               queue_size=self.queue_size)
        self.depth_lc_pub = rospy.Publisher('ros_net/depth_lc',
                                            sensor_msgs.msg.Image,
                                            queue_size=self.queue_size)
        self.dpv_pub = rospy.Publisher('ros_net/dpv_pub',
                                       TensorMsg,
                                       queue_size=self.queue_size)
        self.unc_pub = rospy.Publisher('ros_net/unc_pub',
                                       TensorMsg,
                                       queue_size=self.queue_size)
        self.debug_pub = rospy.Publisher('ros_net/debug',
                                         sensor_msgs.msg.Image,
                                         queue_size=self.queue_size)
        self.debug2_pub = rospy.Publisher('ros_net/debug2',
                                          sensor_msgs.msg.Image,
                                          queue_size=self.queue_size)
        self.sensed_pub = rospy.Publisher('ros_net/sensed_pub',
                                          TensorMsg,
                                          queue_size=self.queue_size)
コード例 #14
0
def main():
    setup_default_logging()
    args, args_text = _parse_args()

    args.prefetcher = not args.no_prefetcher
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
        if args.distributed and args.num_gpu > 1:
            logging.warning('Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.')
            args.num_gpu = 1

    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        args.num_gpu = 1
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl', init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
    assert args.rank >= 0

    if args.distributed:
        logging.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
                     % (args.rank, args.world_size))
    else:
        logging.info('Training with a single process on %d GPUs.' % args.num_gpu)

    torch.manual_seed(args.seed + args.rank)

    # my model
    use_aux = args.aux_weight > 0.
    if args.model_method == 'darts_NAS':
        if args.genotype is None:
            args.genotype = get_model.get_model(args.model_method, args.model_name)
        model = AugmentCNN_ImageNet(224, 3, args.init_channels, args.num_classes, args.layers,
                                    use_aux, args.genotype)
    elif args.model_method == 'my_model_collection':
        from models.my_searched_model import my_specialized
        _ = args.model_name.split(':')
        net_config_path = os.path.join(project_path, 'models', 'my_model_collection',
                                       _[0], _[1] + '.json')
        model = my_specialized(num_classes=args.num_classes, net_config=net_config_path,
                               dropout_rate=args.drop)
    else:
        model_fun = get_model.get_model(args.model_method, args.model_name)
        model = model_fun(num_classes=args.num_classes, dropout_rate=args.drop)
    # set bn
    model.set_bn_param(args.bn_momentum, args.bn_eps)
    # model init
    model.init_model(model_init=args.model_init)
    # pdb.set_trace()
    # model = create_model(
    #     args.model,
    #     pretrained=args.pretrained,
    #     num_classes=args.num_classes,
    #     drop_rate=args.drop,
    #     drop_connect_rate=args.drop_connect,
    #     global_pool=args.gp,
    #     bn_tf=args.bn_tf,
    #     bn_momentum=args.bn_momentum,
    #     bn_eps=args.bn_eps,
    #     checkpoint_path=args.initial_checkpoint)

    if args.local_rank == 0:
        logging.info('Model %s created, param count: %d' %
                     (args.model, sum([m.numel() for m in model.parameters()])))

    data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0)

    num_aug_splits = 0
    if args.aug_splits > 0:
        assert args.aug_splits > 1, 'A split of 1 makes no sense'
        num_aug_splits = args.aug_splits

    if args.split_bn:
        assert num_aug_splits > 1 or args.resplit
        model = convert_splitbn_model(model, max(num_aug_splits, 2))

    if args.num_gpu > 1:
        if args.amp:
            logging.warning(
                'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.')
            args.amp = False
        model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda()
    else:
        model.cuda()

    optimizer = create_optimizer(args, model)

    use_amp = False
    if has_apex and args.amp:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        use_amp = True
    if args.local_rank == 0:
        logging.info('NVIDIA APEX {}. AMP {}.'.format(
            'installed' if has_apex else 'not installed', 'on' if use_amp else 'off'))

    # optionally resume from a checkpoint
    resume_state = {}
    resume_epoch = None
    if args.resume:
        resume_state, resume_epoch = resume_checkpoint(model, args.resume)
    if resume_state and not args.no_resume_opt:
        if 'optimizer' in resume_state:
            if args.local_rank == 0:
                logging.info('Restoring Optimizer state from checkpoint')
            optimizer.load_state_dict(resume_state['optimizer'])
        if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__:
            if args.local_rank == 0:
                logging.info('Restoring NVIDIA AMP state from checkpoint')
            amp.load_state_dict(resume_state['amp'])
    del resume_state

    model_ema = None
    if args.model_ema:
        # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
        model_ema = ModelEma(
            model,
            decay=args.model_ema_decay,
            device='cpu' if args.model_ema_force_cpu else '',
            resume=args.resume)

    if args.distributed:
        if args.sync_bn:
            assert not args.split_bn
            try:
                if has_apex:
                    model = convert_syncbn_model(model)
                else:
                    model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
                if args.local_rank == 0:
                    logging.info(
                        'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
                        'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.')
            except Exception as e:
                logging.error('Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1')
        if has_apex:
            model = DDP(model, delay_allreduce=True)
        else:
            if args.local_rank == 0:
                logging.info("Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP.")
            model = DDP(model, device_ids=[args.local_rank])  # can use device str in Torch >= 1.1
        # NOTE: EMA model does not need to be wrapped by DDP

    lr_scheduler, num_epochs = create_scheduler(args, optimizer)
    start_epoch = 0
    if args.start_epoch is not None:
        # a specified start_epoch will always override the resume epoch
        start_epoch = args.start_epoch
    elif resume_epoch is not None:
        start_epoch = resume_epoch
    if lr_scheduler is not None and start_epoch > 0:
        lr_scheduler.step(start_epoch)

    if args.local_rank == 0:
        logging.info('Scheduled epochs: {}'.format(num_epochs))

    train_dir = os.path.join(args.data, 'train')
    if not os.path.exists(train_dir):
        logging.error('Training folder does not exist at: {}'.format(train_dir))
        exit(1)
    dataset_train = Dataset(train_dir)

    collate_fn = None
    if args.prefetcher and args.mixup > 0:
        assert not num_aug_splits  # collate conflict (need to support deinterleaving in collate mixup)
        collate_fn = FastCollateMixup(args.mixup, args.smoothing, args.num_classes)

    if num_aug_splits > 1:
        dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits)

    loader_train = create_loader(
        dataset_train,
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        re_prob=args.reprob,
        re_mode=args.remode,
        re_count=args.recount,
        re_split=args.resplit,
        color_jitter=args.color_jitter,
        auto_augment=args.aa,
        num_aug_splits=num_aug_splits,
        interpolation=args.train_interpolation,
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        collate_fn=collate_fn,
        pin_memory=args.pin_mem,
    )

    eval_dir = os.path.join(args.data, 'val')
    if not os.path.isdir(eval_dir):
        eval_dir = os.path.join(args.data, 'validation')
        if not os.path.isdir(eval_dir):
            logging.error('Validation folder does not exist at: {}'.format(eval_dir))
            exit(1)
    dataset_eval = Dataset(eval_dir)

    loader_eval = create_loader(
        dataset_eval,
        input_size=data_config['input_size'],
        batch_size=args.validation_batch_size_multiplier * args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        crop_pct=data_config['crop_pct'],
        pin_memory=args.pin_mem,
    )

    if args.jsd:
        assert num_aug_splits > 1  # JSD only valid with aug splits set
        train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing).cuda()
        validate_loss_fn = nn.CrossEntropyLoss().cuda()
    elif args.mixup > 0.:
        # smoothing is handled with mixup label transform
        train_loss_fn = SoftTargetCrossEntropy().cuda()
        validate_loss_fn = nn.CrossEntropyLoss().cuda()
    elif args.smoothing:
        train_loss_fn = LabelSmoothingCrossEntropy(smoothing=args.smoothing).cuda()
        validate_loss_fn = nn.CrossEntropyLoss().cuda()
    else:
        train_loss_fn = nn.CrossEntropyLoss().cuda()
        validate_loss_fn = train_loss_fn

    eval_metric = args.eval_metric
    best_metric = None
    best_epoch = None
    saver = None
    output_dir = ''
    if args.local_rank == 0:
        output_base = args.output if args.output else './output'
        exp_name = '-'.join([
            datetime.now().strftime("%Y%m%d-%H%M%S"),
            args.model_method,
            args.model_name,
            str(data_config['input_size'][-1])
        ])
        output_dir = get_outdir(output_base, 'train', exp_name)
        decreasing = True if eval_metric == 'loss' else False
        saver = CheckpointSaver(checkpoint_dir=output_dir, decreasing=decreasing)
        with open(os.path.join(output_dir, 'args.yaml'), 'w') as f:
            f.write(args_text)

    try:
        for epoch in range(start_epoch, num_epochs):
            if args.distributed:
                loader_train.sampler.set_epoch(epoch)

            train_metrics = train_epoch(
                epoch, model, loader_train, optimizer, train_loss_fn, args,
                lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir,
                use_amp=use_amp, model_ema=model_ema)

            if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
                if args.local_rank == 0:
                    logging.info("Distributing BatchNorm running means and vars")
                distribute_bn(model, args.world_size, args.dist_bn == 'reduce')

            eval_metrics = validate(model, loader_eval, validate_loss_fn, args)

            if model_ema is not None and not args.model_ema_force_cpu:
                if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
                    distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce')

                ema_eval_metrics = validate(
                    model_ema.ema, loader_eval, validate_loss_fn, args, log_suffix=' (EMA)')
                eval_metrics = ema_eval_metrics

            if lr_scheduler is not None:
                # step LR for next epoch
                lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])

            update_summary(
                epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'),
                write_header=best_metric is None)

            if saver is not None:
                # save proper checkpoint with eval metric
                save_metric = eval_metrics[eval_metric]
                best_metric, best_epoch = saver.save_checkpoint(
                    model, optimizer, args,
                    epoch=epoch, model_ema=model_ema, metric=save_metric, use_amp=use_amp)

    except KeyboardInterrupt:
        pass
    if best_metric is not None:
        logging.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
コード例 #15
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    # torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    if config.deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.enabled = True
    else:
        torch.backends.cudnn.benchmark = True

    # get data with meta info
    if config.data_loader_type == 'torch':
        input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data(
            config.dataset, config.data_path, config.cutout_length,
            auto_augmentation=config.auto_augmentation)
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=config.batch_size,
                                                   shuffle=True,
                                                   num_workers=config.workers,
                                                   pin_memory=True)
        valid_loader = torch.utils.data.DataLoader(valid_data,
                                                   batch_size=config.batch_size,
                                                   shuffle=False,
                                                   num_workers=config.workers,
                                                   pin_memory=True)
    elif config.data_loader_type == 'dali':
        input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data_dali(
            config.dataset, config.data_path, batch_size=config.batch_size, num_threads=config.workers)
        train_loader = train_data
        valid_loader = valid_data
    else:
        raise NotImplementedError

    if config.label_smoothing > 0:
        from utils import LabelSmoothLoss
        criterion = LabelSmoothLoss(smoothing=config.label_smoothing).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    use_aux = config.aux_weight > 0.
    if config.model_method == 'darts_NAS':
        if config.genotype is None:
            config.genotype = get_model.get_model(config.model_method, config.model_name)
        if 'imagenet' in config.dataset.lower():
            model = AugmentCNN_ImageNet(input_size, input_channels, config.init_channels, n_classes, config.layers,
                           use_aux, config.genotype)
        else:
            model = AugmentCNN(input_size, input_channels, config.init_channels, n_classes, config.layers,
                               use_aux, config.genotype)
    elif config.model_method == 'my_model_collection':
        from models.my_searched_model import my_specialized
        if config.structure_path is None:
            _ = config.model_name.split(':')
            net_config_path = os.path.join(project_path, 'models', 'my_model_collection',
                                           _[0], _[1] + '.json')
        else:
            net_config_path = config.structure_path
        model = my_specialized(num_classes=n_classes, net_config=net_config_path,
                               dropout_rate=config.dropout_rate)
    else:
        model_fun = get_model.get_model(config.model_method, config.model_name)
        model = model_fun(num_classes=n_classes, dropout_rate=config.dropout_rate)
    # set bn
    model.set_bn_param(config.bn_momentum, config.bn_eps)
    # model init
    model.init_model(model_init=config.model_init)
    model.cuda()
    # model size
    total_ops, total_params = flops_counter.profile(model, [1, input_channels, input_size, input_size])
    logger.info("Model size = {:.3f} MB".format(total_params))
    logger.info("Model FLOPS with input {} = {:.3f} M".format(str([1, input_channels, input_size, input_size]),
                                                              total_ops))
    total_ops, total_params = flops_counter.profile(model, [1, 3, 224, 224])
    logger.info("Model FLOPS with input [1,3,224,224] {:.3f} M".format(total_ops))

    model = nn.DataParallel(model).to(device)
    # weights optimizer
    if not config.no_decay_keys == 'None':
        keys = config.no_decay_keys.split('#')
        optimizer = torch.optim.SGD([
            {'params': model.module.get_parameters(keys, mode='exclude'), 'weight_decay': config.weight_decay},
            {'params': model.module.get_parameters(keys, mode='include'), 'weight_decay': 0},
        ], lr=config.lr, momentum=config.momentum)
    else:
        optimizer = torch.optim.SGD(model.parameters(), config.lr, momentum=config.momentum,
                                    weight_decay=config.weight_decay)

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs)

    best_top1 = 0.
    # training loop
    _size = get_iterator_length(train_loader)
    for epoch in range(config.epochs):
        lr_scheduler.step()
        if config.drop_path_prob > 0:
            drop_prob = config.drop_path_prob * epoch / config.epochs
            model.module.drop_path_prob(drop_prob)

        # training
        train(train_loader, model, optimizer, criterion, epoch)

        # validation
        cur_step = (epoch+1) * _size
        top1 = validate(valid_loader, model, criterion, epoch, cur_step)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
            logger.info("Current best Prec@1 = {:.4%}".format(best_top1))
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

        print("")

    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
コード例 #16
0
class DatasetSplit(Dataset):
    def __init__(self, dataset, idxs):
        self.dataset = dataset
        self.idxs = list(idxs)

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        image, label = self.dataset[self.idxs[item]]
        return image, label


model = get_model(model_name,
                  dataset,
                  rand_seed=seed,
                  step_size=step_size,
                  device=device,
                  flatten_weight=flatten_weight)

stat = CollectStatistics(results_file_name=fl_results_file_path)
train_loader_list = []
dataiter_list = []
for n in range(n_nodes):
    train_loader_list.append(
        DataLoader(DatasetSplit(data_train, dict_users[n]),
                   batch_size=batch_size_train,
                   shuffle=True))
    dataiter_list.append(iter(train_loader_list[n]))

w_global_init = model.get_weight()
w_global = copy.deepcopy(w_global_init)
コード例 #17
0
def main():
    print("evaluate start")

    # set default gpu device id
    # torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    if config.deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.enabled = True
    else:
        torch.backends.cudnn.benchmark = True

    # get data with meta info
    if config.data_loader_type == 'torch':
        input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data(
            config.dataset,
            config.data_path,
            config.cutout_length,
            auto_augmentation=config.auto_augmentation)
        # train_loader = torch.utils.data.DataLoader(train_data,
        #                                            batch_size=config.batch_size,
        #                                            shuffle=True,
        #                                            num_workers=config.workers,
        #                                            pin_memory=True)
        valid_loader = torch.utils.data.DataLoader(
            valid_data,
            batch_size=config.batch_size,
            shuffle=False,
            num_workers=config.workers,
            pin_memory=False)
    elif config.data_loader_type == 'dali':
        input_size, input_channels, n_classes, train_data, valid_data = get_data.get_data_dali(
            config.dataset,
            config.data_path,
            batch_size=config.batch_size,
            num_threads=config.workers)
        # train_loader = train_data
        valid_loader = valid_data
    else:
        raise NotImplementedError

    use_aux = config.aux_weight > 0.
    if config.model_method == 'darts_NAS':
        if config.genotype is None:
            config.genotype = get_model.get_model(config.model_method,
                                                  config.model_name)
        if 'imagenet' in config.dataset.lower():
            model = AugmentCNN_ImageNet(input_size, input_channels,
                                        config.init_channels, n_classes,
                                        config.layers, use_aux,
                                        config.genotype)
        else:
            model = AugmentCNN(input_size, input_channels,
                               config.init_channels, n_classes, config.layers,
                               use_aux, config.genotype)
    elif config.model_method == 'my_model_collection':
        from models.my_searched_model import my_specialized
        if config.structure_path is None:
            _ = config.model_name.split(':')
            net_config_path = os.path.join(project_path, 'models',
                                           'my_model_collection', _[0],
                                           _[1] + '.json')
        else:
            net_config_path = config.structure_path
        # model = my_specialized(num_classes=n_classes, net_config=net_config_path,
        #                        dropout_rate=config.dropout_rate)
        model = my_specialized(num_classes=n_classes,
                               net_config=net_config_path,
                               dropout_rate=0)
    else:
        model_fun = get_model.get_model(config.model_method, config.model_name)
        # model = model_fun(num_classes=n_classes, dropout_rate=config.dropout_rate)
        model = model_fun(num_classes=n_classes, dropout_rate=0)
    # load model
    ckpt = torch.load(config.pretrained)
    print(ckpt.keys())
    # for k in model:
    #     print(k)
    # return
    # set bn
    # model.set_bn_param(config.bn_momentum, config.bn_eps)
    for _key in list(ckpt['state_dict_ema'].keys()):
        if 'total_ops' in _key or 'total_params' in _key:
            del ckpt['state_dict_ema'][_key]
    model.load_state_dict(ckpt['state_dict_ema'])
    # model init
    # model.init_model(model_init=config.model_init)
    model.cuda()
    # model size
    total_ops, total_params = flops_counter.profile(
        model, [1, input_channels, input_size, input_size])
    print("Model size = {:.3f} MB".format(total_params))
    print("Model FLOPS with input {} = {:.3f} M".format(
        str([1, input_channels, input_size, input_size]), total_ops))
    total_ops, total_params = flops_counter.profile(model, [1, 3, 224, 224])
    print("Model FLOPS with input [1,3,224,224] {:.3f} M".format(total_ops))

    model = nn.DataParallel(model).to(device)
    # CRITERION
    if config.label_smoothing > 0:
        from utils import LabelSmoothLoss
        criterion = LabelSmoothLoss(
            smoothing=config.label_smoothing).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    best_top1 = validate(valid_loader, model, criterion, 0, 0)

    print("Final best Prec@1 = {:.4%}".format(best_top1))
コード例 #18
0
def save_cams(config: Config):
    config_json = config.toDictionary()
    print('save_cams')
    print(config_json)
    import shutil
    import cv2
    import os
    import numpy as np
    from torch.utils.data.dataloader import DataLoader
    from data.loader_segmentation import Segmentation
    from artifacts.artifact_manager import artifact_manager

    # Set up model
    model = get_model(config.classifier_name)
    model.load()
    model.eval()
    model.to(model.device)

    # Set up data loader
    dataloader = DataLoader(Segmentation(
        config.classifier_dataset_root,
        source='train',
        augmentation='val',
        image_size=config.classifier_image_size,
        requested_labels=['classification', 'segmentation']),
                            batch_size=config.cams_produce_batch_size,
                            shuffle=False,
                            num_workers=4,
                            prefetch_factor=4)

    # Clear and create destination directory
    cam_path = os.path.join(artifact_manager.getDir(), 'cam')
    if (os.path.exists(cam_path)):
        shutil.rmtree(cam_path)
    os.makedirs(cam_path)

    label_cam_path = os.path.join(artifact_manager.getDir(), 'labels_cam')
    if (os.path.exists(label_cam_path)):
        shutil.rmtree(label_cam_path)
    os.makedirs(label_cam_path)

    for batch_no, batch in enumerate(dataloader):
        inputs_in = batch[0]
        labels_in = batch[1]
        datapacket_in = batch[2]

        # Run images through model and get raw cams
        with torch.no_grad():
            cams = model.event({
                'name': 'get_cam',
                'inputs': inputs_in,
                'labels': labels_in,
                'batch': batch_no + 1
            })

        # Save out cams
        for cam_no, cam in enumerate(cams):
            # Save out ground truth labels for testing the rest of the system
            if config.cams_save_gt_labels:
                cam = labels_in['segmentation'][cam_no][1:]
                cam = F.adaptive_avg_pool2d(cam, [32, 32]).numpy()

                for i in range(0, cam.shape[0]):
                    cam[i] = cv2.blur(cam[i], (3, 3))
                    cam[i] = cv2.blur(cam[i], (3, 3))

            # Disregard false positives
            gt_mask = labels_in['classification'][cam_no].numpy()
            gt_mask[gt_mask > 0.5] = 1
            gt_mask[gt_mask <= 0.5] = 0
            gt_mask = np.expand_dims(np.expand_dims(gt_mask, -1), -1)
            cam *= gt_mask

            # Scale CAM to match input size
            cam = np.moveaxis(cam, 0, -1)
            cam = cv2.resize(
                cam,
                (config.classifier_image_size, config.classifier_image_size),
                interpolation=cv2.INTER_LINEAR)
            cam = np.moveaxis(cam, -1, 0)

            # - Cut CAM from input size and upscale to original image size
            width = datapacket_in['width'][cam_no].detach().numpy()
            height = datapacket_in['height'][cam_no].detach().numpy()
            content_width = datapacket_in['content_width'][cam_no].detach(
            ).numpy()
            content_height = datapacket_in['content_height'][cam_no].detach(
            ).numpy()
            cam = cam[:, 0:content_height, 0:content_width]
            cam = np.moveaxis(cam, 0, -1)
            cam = cv2.resize(cam, (width, height),
                             interpolation=cv2.INTER_LINEAR)
            cam = np.moveaxis(cam, -1, 0)

            # Normalize each cam map to between 0 and 1
            cam_max = np.max(cam, (1, 2), keepdims=True)
            cam_norm = cam / (cam_max + 1e-5)

            cam_bg = (
                1 -
                np.max(cam_norm, axis=0, keepdims=True))**config.cams_bg_alpha
            cam_with_bg = np.concatenate((cam_bg, cam_norm), axis=0)
            label_cam = label_to_image(cam_with_bg)

            # Collapse cam from 3d into long 2d
            cam_norm = np.reshape(
                cam_norm,
                (cam_norm.shape[0] * cam_norm.shape[1], cam_norm.shape[2]))
            cam_norm[cam_norm > 1] = 1
            cam_norm[cam_norm < 0] = 0
            label_cam[label_cam > 1] = 1
            label_cam[label_cam < 0] = 0

            # Write image
            img_no = datapacket_in['image_name'][cam_no]
            cv2.imwrite(
                os.path.join(cam_path, img_no) + '.png', cam_norm * 255)
            cv2.imwrite(
                os.path.join(label_cam_path, img_no) + '.png', label_cam * 255)
            print('Save cam : ', img_no, end='\r')
    print('')
コード例 #19
0
import socket
import time

import numpy as np

from control_algorithm.adaptive_tau import ControlAlgAdaptiveTauServer
from data_reader.data_reader import get_data
from models.get_model import get_model
from statistic.collect_stat import CollectStatistics
from util.utils import send_msg, recv_msg, get_indices_each_node_case

# Configurations are in a separate config.py file
from config import *

model = get_model(model_name)
if hasattr(model, 'create_graph'):
    model.create_graph(learning_rate=step_size)

if time_gen is not None:
    use_fixed_averaging_slots = True
else:
    use_fixed_averaging_slots = False

if batch_size < total_data:   # Read all data once when using stochastic gradient descent
    train_image, train_label, test_image, test_label, train_label_orig = get_data(dataset, total_data, dataset_file_path)

    # This function takes a long time to complete,
    # putting it outside of the sim loop because there is no randomness in the current way of computing the indices
    indices_each_node_case = get_indices_each_node_case(n_nodes, MAX_CASE, train_label_orig)

listening_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
コード例 #20
0
def train_net(args, logger, seed):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    logger.info('seed={}'.format(seed))

    # init seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    # cudnn.benchmark = True
    cudnn.benchmark = False
    cudnn.deterministic = True  # cudnn

    writer = SummaryWriter(args.outpath)
    start_epoch = 0
    val_best_acc = 0
    val_best_acc_index = 0

    # data_loader
    train_loader, val_loader, target_class_num, dataset_sizes = \
        get_target_dataloader(args.target_dataset, args.batch_size, args.num_workers, args.target_data_dir,
                              image_size=args.image_size, data_aug=args.data_aug, logger=logger)

    # model setting
    model_source, model_target = get_model(args.base_model_name,
                                           args.base_task, logger, args)

    # target_model split: (feature, classifier)
    model_feature, model_source_classifier, model_target_classifier = \
        model_split(args.base_model_name, model_target, target_class_num, logger, args)

    if len(args.gpu_id) > 1:
        model_source = nn.DataParallel(model_source)
        model_feature = nn.DataParallel(model_feature)
        model_source_classifier = nn.DataParallel(model_source_classifier)
        model_target_classifier = nn.DataParallel(model_target_classifier)
        model_source = model_source.cuda()
        model_feature = model_feature.cuda()
        model_target_classifier = model_target_classifier.cuda()
        model_source_classifier = model_source_classifier.cuda()
        logger.info("push all model to dataparallel and then gpu")
    else:
        model_source = model_source.cuda()
        model_feature = model_feature.cuda()
        model_target_classifier = model_target_classifier.cuda()
        model_source_classifier = model_source_classifier.cuda()
        logger.info("push all model to gpu")

    # iterations -> epochs
    num_epochs = int(np.round(args.max_iter * args.batch_size / dataset_sizes))
    step = [int(0.67 * num_epochs)]
    logger.info('num_epochs={}, step={}'.format(num_epochs, step))

    # loss
    loss_fn = get_loss_type(loss_type=args.loss_type, logger=logger)

    # get feature_criterions
    if args.reg_type in ['channel_att_fea_map_learn', 'fea_loss']:
        feature_criterions = get_reg_criterions(args, logger)

    # optimizer and lr_scheduler
    optimizer, lr_scheduler = get_optimier_and_scheduler(
        args, model_feature, model_target_classifier, feature_criterions, step,
        logger)

    # init framework
    framework = TransferFramework(args,
                                  train_loader,
                                  val_loader,
                                  target_class_num,
                                  args.data_aug,
                                  args.base_model_name,
                                  model_source,
                                  model_feature,
                                  model_source_classifier,
                                  model_target_classifier,
                                  feature_criterions,
                                  loss_fn,
                                  num_epochs,
                                  optimizer,
                                  lr_scheduler,
                                  writer,
                                  logger,
                                  print_freq=args.print_freq)

    # epochs
    for epoch in range(start_epoch, num_epochs):
        # train epoch
        clc_loss, kl_loss, fea_loss, train_total_loss, train_top1_acc = framework.train(
            epoch)
        # val epoch
        val_loss, val_top1_acc = framework.val(epoch)
        # record into txt
        ours_record_epoch_data(args.outpath, epoch, clc_loss, kl_loss,
                               fea_loss, train_total_loss, train_top1_acc,
                               val_loss, val_top1_acc)

        if val_top1_acc >= val_best_acc:
            val_best_acc = val_top1_acc
            val_best_acc_index = epoch
            # save_checkpoint
            save_checkpoint(args.outpath, epoch, model_feature,
                            model_source_classifier, model_target_classifier,
                            optimizer, lr_scheduler, val_best_acc)

        logger.info(
            '||==>Val Epoch: Val_best_acc_index={}\tVal_best_acc={:.4f}\n'.
            format(val_best_acc_index, val_best_acc))
        # break
    return val_best_acc
コード例 #21
0
# Generate color palette
palette = {0: (0, 0, 0)}
for k, color in enumerate(sns.color_palette("hls", N_CLASSES + 1)):
    palette[k + 1] = tuple(np.asarray(255 * np.array(color), dtype='uint8'))


def color_results(arr2d, palette):
    arr_3d = np.zeros((arr2d.shape[0], arr2d.shape[1], 3), dtype=np.uint8)
    for c, i in palette.items():
        m = arr2d == c
        arr_3d[m] = i
    return arr_3d


#load model and weights
model = get_model(MODEL, args.dataset, N_CLASSES, N_BANDS, PATCH_SIZE)
print('Loading weights from %s' % WEIGHTS + '/model_best.pth')
model = model.to(device)
model.load_state_dict(torch.load(WEIGHTS + '/model_best.pth'))
model.eval()

#testing model
probabilities = test(model, WEIGHTS, img, PATCH_SIZE, N_CLASSES, device=device)
prediction = np.argmax(probabilities, axis=-1)

run_results = metrics(prediction, gt, n_classes=N_CLASSES)

prediction[gt < 0] = -1

#color results
colored_gt = color_results(gt + 1, palette)
コード例 #22
0
def save_semseg(
    dataset_root,
    model_name,
    batch_size=8,
    image_size=256,
    use_gt_labels=False,
):
    print('Save semseg : ', locals())
    import shutil
    import cv2
    import os
    import numpy as np
    import torch.nn.functional as F
    import torch
    from models.get_model import get_model
    from torch.utils.data.dataloader import DataLoader
    from data.loader_segmentation import Segmentation
    from artifacts.artifact_manager import artifact_manager
    from data.voc2012 import label_to_image

    # Set up model
    model = get_model(model_name)
    model.load()
    model.to(model.device)
    model.train(False)

    # Set up data loader
    dataloader = DataLoader(
        Segmentation(dataset_root,
                     source='val',
                     source_augmentation='val',
                     image_size=image_size,
                     requested_labels=['classification', 'segmentation']),
        batch_size=batch_size,
        shuffle=False,
        num_workers=4,
        prefetch_factor=4,
    )

    # Clear and create destination directory
    semseg_path = os.path.join(artifact_manager.getDir(), 'semseg_output')
    if (os.path.exists(semseg_path)):
        shutil.rmtree(semseg_path)
    os.makedirs(semseg_path)

    for batch_no, batch in enumerate(dataloader):
        inputs_in = batch[0]
        labels_in = batch[1]
        datapacket_in = batch[2]

        # Run images through model and get raw cams
        with torch.no_grad():
            semsegs = model.event({
                'name': 'get_semseg',
                'inputs': inputs_in,
                'labels': labels_in,
                'batch': batch_no + 1
            })

            semsegs = semsegs.detach().cpu().numpy()

        # Save out cams
        for semseg_no, semseg in enumerate(semsegs):
            # Save out ground truth labels for testing the rest of the system
            if use_gt_labels:
                semseg = labels_in['segmentation'][semseg_no][1:]
                semseg = F.adaptive_avg_pool2d(semseg, [32, 32]).numpy()

                for i in range(0, semseg.shape[0]):
                    semseg[i] = cv2.blur(semseg[i], (3, 3))
                    semseg[i] = cv2.blur(semseg[i], (3, 3))

            # # Disregard false positives
            # gt_mask = labels_in['classification'][semseg_no].numpy()
            # gt_mask[gt_mask > 0.5] = 1
            # gt_mask[gt_mask <= 0.5] = 0
            # gt_mask = np.expand_dims(np.expand_dims(gt_mask, -1), -1)
            # cam *= gt_mask

            # Upsample CAM to original image size
            # - Calculate original image aspect ratio
            width = datapacket_in['width'][semseg_no].detach().numpy()
            height = datapacket_in['height'][semseg_no].detach().numpy()
            aspect_ratio = width / height

            # - Calculate width and height to cut from upscaled CAM
            if aspect_ratio > 1:
                cut_width = image_size
                cut_height = round(image_size / aspect_ratio)
            else:
                cut_width = round(image_size * aspect_ratio)
                cut_height = image_size

            # - Upscale CAM to match input size
            semseg = np.moveaxis(semseg, 0, -1)
            semseg = cv2.resize(semseg, (image_size, image_size),
                                interpolation=cv2.INTER_LINEAR)
            semseg = np.moveaxis(semseg, -1, 0)

            # - Cut CAM from input size and upscale to original image size
            semseg = semseg[:, 0:cut_height, 0:cut_width]
            semseg = np.moveaxis(semseg, 0, -1)
            semseg = cv2.resize(semseg, (width, height),
                                interpolation=cv2.INTER_LINEAR)
            semseg = np.moveaxis(semseg, -1, 0)

            semseg_as_label = label_to_image(semseg)

            # Write image
            img_no = datapacket_in['image_name'][semseg_no]
            cv2.imwrite(
                os.path.join(semseg_path, img_no) + '.png',
                semseg_as_label * 255)
            print('Save cam : ', img_no, end='\r')
    print('')
コード例 #23
0
def save_labels(
    dataset_root,
    model_name,
    batch_size=8,
    image_size=256,
    use_gt_labels=False,
):
    print('Save cams : ', locals())
    import shutil
    import cv2
    import os
    import numpy as np
    from torch.utils.data.dataloader import DataLoader
    from data.loader_segmentation import Segmentation
    from artifacts.artifact_manager import artifact_manager

    # Set up model
    model = get_model(model_name)
    model.load()
    model.to(model.device)

    # Set up data loader
    dataloader = DataLoader(
        Segmentation(
            dataset_root,
            source='trainval',
            source_augmentation='val',
            image_size=image_size,
            requested_labels=['classification', 'segmentation']
        ),
        batch_size=batch_size,
        shuffle=False,
        num_workers=4,
    )

    # Clear and create desintation directory
    cam_path = os.path.join(artifact_manager.getDir(), 'cam')
    if (os.path.exists(cam_path)):
        shutil.rmtree(cam_path)
    os.makedirs(cam_path)

    for batch_no, batch in enumerate(dataloader):
        inputs_in = batch[0]
        labels_in = batch[1]
        datapacket_in = batch[2]

        # Run images through model and get raw cams
        with torch.no_grad():
            cams = model.event({
                'name': 'get_cam',
                'inputs': inputs_in,
                'labels': labels_in,
                'batch': batch_no+1
            })

        # Save out cams
        for cam_no, cam in enumerate(cams):
            # Save out ground truth labels for testing the rest of the system
            if use_gt_labels:
                cam = labels_in['segmentation'][cam_no][1:]
                cam = F.adaptive_avg_pool2d(cam, [32, 32]).numpy()

                for i in range(0, cam.shape[0]):
                    cam[i] = cv2.blur(cam[i], (3, 3))
                    cam[i] = cv2.blur(cam[i], (3, 3))

            # Disregard false positives
            gt_mask = labels_in['classification'][cam_no].numpy()
            gt_mask[gt_mask > 0.5] = 1
            gt_mask[gt_mask <= 0.5] = 0
            gt_mask = np.expand_dims(np.expand_dims(gt_mask, -1), -1)
            cam *= gt_mask

            # Upsample CAM to original image size
            # - Calculate original image aspect ratio
            width = datapacket_in['width'][cam_no].detach().numpy()
            height = datapacket_in['height'][cam_no].detach().numpy()
            aspect_ratio = width / height

            # - Calculate width and height to cut from upscaled CAM
            if aspect_ratio > 1:
                cut_width = image_size
                cut_height = round(image_size / aspect_ratio)
            else:
                cut_width = round(image_size * aspect_ratio)
                cut_height = image_size

            # - Upscale CAM to match input size
            cam = np.moveaxis(cam, 0, -1)
            cam = cv2.resize(cam, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
            cam = np.moveaxis(cam, -1, 0)

            # - Cut CAM from input size and upscale to original image size 
            cam = cam[:, 0:cut_height, 0:cut_width]
            cam = np.moveaxis(cam, 0, -1)
            cam = cv2.resize(cam, (width, height), interpolation=cv2.INTER_LINEAR)
            cam = np.moveaxis(cam, -1, 0)

            # Normalize each cam map to between 0 and 1
            cam_max = np.max(cam, (1, 2), keepdims=True)
            cam_norm = cam / (cam_max + 1e-5)

            # Collapse cam from 3d into long 2d
            cam_norm = np.reshape(cam_norm, (cam_norm.shape[0] * cam_norm.shape[1], cam_norm.shape[2]))
            cam_norm[cam_norm > 1] = 1
            cam_norm[cam_norm < 0] = 0

            # Write image
            img_no = datapacket_in['image_name'][cam_no]
            cv2.imwrite(os.path.join(cam_path, img_no) + '.png', cam_norm * 255)
            print('Save cam : ', img_no, end='\r')
    print('')
コード例 #24
0
        model_name = msg[1]
        dataset = msg[2]
        num_iterations_with_same_minibatch_for_tau_equals_one = msg[3]
        step_size = msg[4]
        batch_size = msg[5]
        total_data = msg[6]
        control_alg_server_instance = msg[7]
        indices_this_node = msg[8]
        read_all_data_for_stochastic = msg[9]
        use_min_loss = msg[10]
        sim = msg[11]
        c_comp = msg[12]

        w_file += str(indices_this_node)
        model = get_model(model_name)
        model2 = get_model(
            model_name
        )  # Used for computing loss_w_prev_min_loss for stochastic gradient descent,
        # so that the state of model can be still used by control algorithm later.

        if hasattr(model, 'create_graph'):
            model.create_graph(learning_rate=step_size)
        if hasattr(model2, 'create_graph'):
            model2.create_graph(learning_rate=step_size)

        # Assume the dataset does not change
        if read_all_data_for_stochastic or batch_size >= total_data:
            if batch_size_prev != batch_size or total_data_prev != total_data or (
                    batch_size >= total_data and sim_prev != sim):
                print('Reading all data samples used in training...')
コード例 #25
0
ファイル: train.py プロジェクト: FrozenAir/yolo_nano
from options.train_options import TrainOptions
from data.get_dataset import get_dataset
from models.get_model import get_model


if __name__ == '__main__':
    opt = TrainOptions().parse() # get training options
    dataset = get_dataset(opt)
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=opt.batch_size,
        shuffle=True,
        collate_fn=dataset.collate_fn
    )

    model = get_model(opt)
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)

    num_batches = int(len(dataloader) / opt.batch_size)
    for epoch in range(opt.start_epochs, opt.start_epochs+opt.epochs):
        for i, (img_path, imgs, targets) in enumerate(dataloader):
            if len(opt.gpu_ids) > 0:
                model = model.to(opt.device)
                imgs = Variable(imgs.to(opt.device))
                if targets is not None:
                    targets = Variable(targets.to(opt.device), requires_grad=False)
            
            model.train()
            loss, yolo_outputs = model.forward(imgs, targets)
            optimizer.zero_grad()
            loss.backward()
def save_cams_random_walk(config: Config):
    config_json = config.toDictionary()
    print('save_cams_random_walk')
    print(config_json)
    import shutil
    import os
    from torch.utils.data.dataloader import DataLoader
    from data.loader_segmentation import Segmentation
    from artifacts.artifact_manager import artifact_manager

    # Set up model
    model = get_model(config.affinity_net_name)
    model.load()
    model.eval()
    model.to(model.device)

    # Set up data loader
    dataloader = DataLoader(Segmentation(
        config.classifier_dataset_root,
        source='train',
        augmentation='affinity_predict',
        image_size=config.affinity_net_image_size,
        requested_labels=['classification', 'segmentation']),
                            batch_size=1,
                            shuffle=False,
                            num_workers=2,
                            prefetch_factor=2)

    # Get cam source directory
    cam_path = os.path.join(artifact_manager.getDir(), 'cam')

    # Clear and create output directory
    labels_rw_path = os.path.join(artifact_manager.getDir(), 'labels_rw')
    if (os.path.exists(labels_rw_path)):
        shutil.rmtree(labels_rw_path)
    os.makedirs(labels_rw_path)

    count = 0

    for batch_no, batch in enumerate(dataloader):
        inputs = batch[0]
        labels = batch[1]
        datapacket = batch[2]

        for image_no, image_name in enumerate(datapacket['image_name']):
            image = inputs['image'].cuda(non_blocking=True)
            image_width = datapacket['width'][image_no].numpy()
            image_height = datapacket['height'][image_no].numpy()
            channels = labels['classification'].shape[1]

            # Pad image
            image_width_padded = int(np.ceil(image_width / 8) * 8)
            image_height_padded = int(np.ceil(image_height / 8) * 8)
            image_padded = F.pad(image,
                                 (0, image_width_padded - image_width, 0,
                                  image_height_padded - image_height))

            image_width_pooled = int(np.ceil(image_width_padded / 8))
            image_height_pooled = int(np.ceil(image_height_padded / 8))

            # Load cam
            cam_path_instance = os.path.join(cam_path, image_name + '.png')
            cam = cv2.imread(cam_path_instance, cv2.IMREAD_GRAYSCALE)
            cam = np.reshape(cam, ((channels, image_height, image_width)))
            cam = cam / 255.0

            # Build cam background
            cam_background = (
                1 - np.max(cam,
                           (0), keepdims=True))**config.affinity_net_bg_alpha
            cam = np.concatenate((cam_background, cam), axis=0)
            cam = cam.astype(np.float32)

            # Pad cam
            cam_padded_width = int(np.ceil(cam.shape[2] / 8) * 8)
            cam_padded_height = int(np.ceil(cam.shape[1] / 8) * 8)
            cam_padded = np.pad(cam,
                                ((0, 0), (0, cam_padded_height - image_height),
                                 (0, cam_padded_width - image_width)),
                                mode='constant')

            # Run images through model and get affinity matrix
            with torch.no_grad():
                aff_mat = model.event({
                    'name': 'infer_aff_net_dense',
                    'image': image_padded,
                })
                aff_mat = torch.pow(aff_mat, config.affinity_net_beta)

            trans_mat = aff_mat / torch.sum(aff_mat, dim=0, keepdim=True)
            for _ in range(config.affinity_net_log_t):
                trans_mat = torch.matmul(trans_mat, trans_mat)

            cam_pooled = F.avg_pool2d(torch.from_numpy(cam_padded), 8, 8)

            cam_vec = cam_pooled.view(21, -1)

            cam_rw = torch.matmul(cam_vec.cuda(), trans_mat)
            cam_rw = cam_rw.view(1, 21, image_height_pooled,
                                 image_width_pooled)

            cam_rw = torch.nn.Upsample(
                (image_height_padded, image_width_padded),
                mode='bilinear')(cam_rw)
            cam_rw = cam_rw.cpu().data[0, :, :image_height, :image_width]

            label_rw = label_to_image(cam_rw)

            cv2.imwrite(os.path.join(labels_rw_path, image_name + '.png'),
                        label_rw * 255)

            count += 1
            print('Save cam : ', count, end='\r')

    print('')