Esempio n. 1
0
    def __init__(self, model, ema, device, cfg, train_loader, val_loader,
                 logger):
        self.config = cfg
        self.epoch = 0
        self.train_loader = train_loader
        self.val_loader = val_loader

        self.base_dir = f'{self.config.OUTPUT_DIR}'
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)

        self.logger = logger
        self.best_final_loss = 9999.0

        self.model = model
        self.device = device
        self.model.to(self.device)
        self.ema = ema.to(self.device)
        self.loss = torch.nn.MSELoss(reduce=True, size_average=True)
        self.optimizer = make_optimizer(cfg, model)

        self.scheduler = make_scheduler(cfg, self.optimizer, train_loader)

        self.logger.info(f'Fitter prepared. Device is {self.device}')
        self.early_stop_epochs = 0
        self.early_stop_patience = self.config.SOLVER.EARLY_STOP_PATIENCE
        self.do_scheduler = True
        self.logger.info("Start training")
Esempio n. 2
0
    def __init__(self, model, device, cfg, train_loader, val_loader, logger):
        self.config = cfg
        self.epoch = 0
        self.train_loader = train_loader
        self.val_loader = val_loader

        self.base_dir = f'{self.config.OUTPUT_DIR}'
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)

        self.logger = logger
        self.best_final_score = 0.0
        self.best_score_threshold = 0.5

        self.model = model
        self.device = device
        self.model.to(self.device)

        self.optimizer = make_optimizer(cfg, model)

        self.scheduler = make_scheduler(cfg, self.optimizer, train_loader)

        self.logger.info(f'Fitter prepared. Device is {self.device}')
        self.all_predictions = []
        self.early_stop_epochs = 0
        self.early_stop_patience = self.config.SOLVER.EARLY_STOP_PATIENCE
        self.do_scheduler = True
        self.logger.info("Start training")
Esempio n. 3
0
 def test_something(self):
     net = nn.Linear(10, 10)
     optimizer = make_optimizer(cfg, net)
     lr_scheduler = WarmupMultiStepLR(optimizer, [20, 40], warmup_iters=10)
     for i in range(50):
         lr_scheduler.step()
         for j in range(3):
             print(i, lr_scheduler.get_lr()[0])
             optimizer.step()
Esempio n. 4
0
    def __init__(self, cfg, logger, writer):
        self.cfg, self.logger, self.writer = cfg, logger, writer
        # Define dataloader
        self.tng_dataloader, self.val_dataloader, self.num_classes, self.num_query = get_dataloader(
            cfg)
        # networks
        self.model = build_model(cfg, self.num_classes)
        # loss function
        self.ce_loss = nn.CrossEntropyLoss()
        self.triplet = TripletLoss(cfg.SOLVER.MARGIN)
        # optimizer and scheduler
        self.opt = make_optimizer(self.cfg, self.model)
        self.lr_sched = make_lr_scheduler(self.cfg, self.opt)

        self._construct()
Esempio n. 5
0
def train(cfg):
    model = build_model(cfg)
    data_rows_num = get_data_rows_num(cfg)

    k_fold = KFold(n_splits=10, shuffle=True, random_state=1)
    n_fold = 1
    for train_idx, val_idx in k_fold.split(
        [i for i in range(1, data_rows_num)]):
        optimizer = make_optimizer(cfg, model)
        train_loader = make_data_loader(cfg, train_idx, is_train=True)
        val_loader = make_data_loader(cfg, val_idx, is_train=True)
        loss_functions = [bce_with_logits_loss, bce_with_logits_loss]
        do_train(cfg, model, train_loader, val_loader, optimizer,
                 loss_functions, n_fold)
        n_fold += 1
        pass
Esempio n. 6
0
    def __init__(self, cfg, logger, writer):
        self.cfg, self.logger, self.writer = cfg, logger, writer
        # Define dataloader
        self.tng_dataloader, self.val_dataloader_collection, self.num_classes, self.num_query_len_collection = get_dataloader(cfg)
        if 'InsDis' in list(self.cfg.SOLVER.LOSSTYPE):
            self.tng_dataloader, self.val_dataloader, self.num_classes, self.num_query = get_ins_dataloader(cfg)
        # networks
        self.model = build_model(cfg, self.num_classes, use_mask=False)
        # loss function
        self.ce_loss = nn.CrossEntropyLoss()
        self.triplet = TripletLoss(cfg.SOLVER.MARGIN)
        self.NCEContrast = NCEAverage(128, len(self.tng_dataloader)*cfg.SOLVER.IMS_PER_BATCH, cfg.NCE.K, cfg.NCE.T, cfg.NCE.M)
        self.NCELoss = NCECriterion(len(self.tng_dataloader))
        # optimizer and scheduler
        self.opt = make_optimizer(self.cfg, self.model)
        self.lr_sched = make_lr_scheduler(self.cfg, self.opt)

        self._construct()
Esempio n. 7
0
 def __init__(self, cfg):
     self.cfg = cfg.clone()
     num_gpus = int(
         os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
     self.logger = setup_logger("deformconv RCNN", 'workspace/logger', 0)
     self.logger.info("Using {} GPUs".format(num_gpus))
     self.logger.info("Collecting env info (might take some time)")
     self.logger.info("\n" + collect_env_info())
     self.logger.info("Running with config:\n{}".format(cfg))
     self.device = torch.device(cfg.MODEL.DEVICE)
     self.model = DeformConvRCNN(cfg).to(self.device)
     [*self.model.backbone.modules()][1].stem.load_state_dict(
         torch.load(cfg.MODEL.BACKBONE.PRETRAINED_STEM_WEIGHTS))
     [*self.model.backbone.modules()][1].layer1.load_state_dict(
         torch.load(cfg.MODEL.BACKBONE.PRETRAINED_LAYER1_WEIGHTS))
     self.train_loader = make_data_loader(cfg, is_train=True)
     self.val_loader = make_data_loader(cfg, is_train=False)[0]
     remove_empty_target(self.val_loader.dataset)
     self.optimizer = make_optimizer(cfg, self.model)
     self.writer = SummaryWriter(cfg.WRITER_DIR)
     self.predictor = Predictor(cfg,
                                self.model,
                                confidence_threshold=cfg.SOLVER.CONF_THRES,
                                min_image_size=cfg.TEST.MIN_IMG_SIZE)
     self.predictor.model.roi_heads.box.post_processor.detections_per_img = 20
     self.step = 0
     self.milestones = cfg.SOLVER.STEPS
     self.workspace = Path(cfg.WORKSPACE)
     self.board_loss_every = len(
         self.train_loader.dataset
     ) // cfg.SOLVER.IMS_PER_BATCH // cfg.SOLVER.BOARD_LOSS_INTERVAL
     self.evaluate_every = len(
         self.train_loader.dataset
     ) // cfg.SOLVER.IMS_PER_BATCH // cfg.SOLVER.EVALUATE_INTERVAL
     self.save_every = len(
         self.train_loader.dataset
     ) // cfg.SOLVER.IMS_PER_BATCH // cfg.SOLVER.SAVE_INTERVAL
     self.board_pred_image_every = len(
         self.train_loader.dataset
     ) // cfg.SOLVER.IMS_PER_BATCH // cfg.SOLVER.BOARD_IMAGE_INTERVAL
     self.inference_every = len(
         self.train_loader.dataset
     ) // cfg.SOLVER.IMS_PER_BATCH // cfg.SOLVER.INFERENCE_INTERVAL
Esempio n. 8
0
    def __init__(self, cfg, logger, writer):
        self.cfg, self.logger, self.writer = cfg, logger, writer
        # Define dataloader
        self.tng_dataloader, self.val_dataloader, self.num_classes, self.num_query = get_dataloader(cfg)
        # networks
        self.model = build_model(cfg, self.num_classes)
        self.base_type = self.model.base_type
        # loss function

        if cfg.SOLVER.LABEL_SMOOTH:
            self.ce_loss = CrossEntropyLabelSmooth(self.num_classes)
        else:
            self.ce_loss = nn.CrossEntropyLoss()
        self.triplet = TripletLoss(cfg.SOLVER.MARGIN)
        self.aligned_triplet = TripletLossAlignedReID(margin=cfg.SOLVER.MARGIN)
        self.of_penalty = OFPenalty(beta=1e-6, penalty_position=['intermediate'])
        # optimizer and scheduler
        self.opt = make_optimizer(self.cfg, self.model)
        self.lr_sched = make_lr_scheduler(self.cfg, self.opt)

        self._construct()
Esempio n. 9
0
    def __init__(self, cfg, logger, writer):
        self.cfg, self.logger, self.writer = cfg, logger, writer
        # Define dataloader
        self.tng_dataloader, self.val_dataloader, self.num_classes, self.num_query = get_dataloader(
            cfg)
        logger.info('num_classes ' + str(self.num_classes))
        # networks
        self.model = build_model(cfg, self.num_classes)
        #self.base_type = self.model.base_type
        # loss function

        #if cfg.SOLVER.LABEL_SMOOTH:
        self.ce_loss = CrossEntropyLabelSmooth(self.num_classes)
        #else:
        #    self.ce_loss = nn.CrossEntropyLoss()
        self.triplet = TripletLoss(cfg.SOLVER.MARGIN)

        # optimizer and scheduler
        self.opt = make_optimizer(self.cfg, self.model)
        self.lr_sched = make_lr_scheduler(self.cfg, self.opt)

        self._construct()
Esempio n. 10
0
    def __init__(self, cfg, logger, writer):
        self.cfg, self.logger, self.writer = cfg, logger, writer
        # Define dataloader
        self.tng_dataloader, self.val_dataloader_collection, self.num_classes, self.num_query_len_collection = get_dataloader_mask(
            cfg)
        # networks
        self.use_part_erasing = False
        self.num_parts = cfg.MODEL.NUM_PARTS
        self.model = build_model_selfgcn(cfg, self.num_classes)
        self.adj = torch.from_numpy(coarse_adj_npy).float()
        # loss function
        self.ce_loss = nn.CrossEntropyLoss()
        self.triplet = TripletLoss(cfg.SOLVER.MARGIN)
        self.mse_loss = nn.MSELoss()
        # optimizer and scheduler
        self.opt = make_optimizer(self.cfg, self.model)
        self.lr_sched = make_lr_scheduler(self.cfg, self.opt)

        self.loss_weight = [1.0, 1.0, 0.5, 0.5, 0.5, 0.5, 0.4]
        self.logger.info(
            f"Loss weights: {self.loss_weight}, use_pe: {self.use_part_erasing}, use_bnfeat: {True}"
        )
        self._construct()
def train(cfg, local_rank, distributed):
    logger = logging.getLogger(cfg.NAME)
    # build model
    model = build_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    # build solver
    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {"iteration": 0}

    save_dir = os.path.join(cfg.CHECKPOINTER.DIR, cfg.CHECKPOINTER.NAME)

    save_to_disk = get_rank() == 0
    checkpointer = Checkpointer(
        model=model, optimizer=optimizer, scheduler=scheduler,
        save_dir=save_dir, save_to_disk=save_to_disk, logger=logger
    )
    extra_checkpoint_data = checkpointer.load(cfg.CHECKPOINTER.LOAD_NAME)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    evaluate = cfg.SOLVER.EVALUATE
    if evaluate:
        synchronize()
        data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed, is_for_period=True)
        synchronize()
    else:
        data_loader_val = None

    save_to_disk = get_rank() == 0
    if cfg.SUMMARY_WRITER and save_to_disk:
        save_dir = os.path.join(cfg.CHECKPOINTER.DIR, cfg.CHECKPOINTER.NAME)
        summary_writer = make_summary_writer(cfg.SUMMARY_WRITER, save_dir, model_name=cfg.MODEL.NAME)
    else:
        summary_writer = None

    do_train(
        cfg,
        model,
        data_loader,
        data_loader_val,
        optimizer,
        scheduler,
        checkpointer,
        device,
        arguments,
        summary_writer
    )

    return model
Esempio n. 12
0
 def test_optimzier(self):
     model = build_model(cfg)
     optimizer, lr_schedule = make_optimizer(cfg, model)
     from IPython import embed
     embed()
Esempio n. 13
0
def train(cfg, args):
    train_set = DatasetCatalog.get(cfg.DATASETS.TRAIN, args)
    val_set = DatasetCatalog.get(cfg.DATASETS.VAL, args)
    train_loader = DataLoader(train_set,
                              cfg.SOLVER.IMS_PER_BATCH,
                              num_workers=cfg.DATALOADER.NUM_WORKERS,
                              shuffle=True)
    val_loader = DataLoader(val_set,
                            cfg.SOLVER.IMS_PER_BATCH,
                            num_workers=cfg.DATALOADER.NUM_WORKERS,
                            shuffle=True)

    gpu_ids = [_ for _ in range(torch.cuda.device_count())]
    model = build_model(cfg)
    model.to("cuda")
    model = torch.nn.parallel.DataParallel(
        model, gpu_ids) if not args.debug else model

    logger = logging.getLogger("train_logger")
    logger.info("Start training")
    train_metrics = MetricLogger(delimiter="  ")
    max_iter = cfg.SOLVER.MAX_ITER
    output_dir = cfg.OUTPUT_DIR

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)
    checkpointer = Checkpointer(model, optimizer, scheduler, output_dir,
                                logger)
    start_iteration = checkpointer.load() if not args.debug else 0

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    validation_period = cfg.SOLVER.VALIDATION_PERIOD
    summary_writer = SummaryWriter(log_dir=os.path.join(output_dir, "summary"))
    visualizer = train_set.visualizer(cfg.VISUALIZATION)(summary_writer)

    model.train()
    start_training_time = time.time()
    last_batch_time = time.time()

    for iteration, inputs in enumerate(cycle(train_loader), start_iteration):
        data_time = time.time() - last_batch_time
        iteration = iteration + 1
        scheduler.step()

        inputs = to_cuda(inputs)
        outputs = model(inputs)

        loss_dict = gather_loss_dict(outputs)
        loss = loss_dict["loss"]
        train_metrics.update(**loss_dict)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time = time.time() - last_batch_time
        last_batch_time = time.time()
        train_metrics.update(time=batch_time, data=data_time)

        eta_seconds = train_metrics.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                train_metrics.delimiter.join([
                    "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}",
                    "max mem: {memory:.0f}"
                ]).format(eta=eta_string,
                          iter=iteration,
                          meters=str(train_metrics),
                          lr=optimizer.param_groups[0]["lr"],
                          memory=torch.cuda.max_memory_allocated() / 1024.0 /
                          1024.0))
            summary_writer.add_scalars("train", train_metrics.mean, iteration)

        if iteration % 100 == 0:
            visualizer.visualize(inputs, outputs, iteration)

        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration))

        if iteration % validation_period == 0:
            with torch.no_grad():
                val_metrics = MetricLogger(delimiter="  ")
                for i, inputs in enumerate(val_loader):
                    data_time = time.time() - last_batch_time

                    inputs = to_cuda(inputs)
                    outputs = model(inputs)

                    loss_dict = gather_loss_dict(outputs)
                    val_metrics.update(**loss_dict)

                    batch_time = time.time() - last_batch_time
                    last_batch_time = time.time()
                    val_metrics.update(time=batch_time, data=data_time)

                    if i % 20 == 0 or i == cfg.SOLVER.VALIDATION_LIMIT:
                        logger.info(
                            val_metrics.delimiter.join([
                                "VALIDATION", "eta: {eta}", "iter: {iter}",
                                "{meters}"
                            ]).format(eta=eta_string,
                                      iter=iteration,
                                      meters=str(val_metrics)))

                    if i == cfg.SOLVER.VALIDATION_LIMIT:
                        summary_writer.add_scalars("val", val_metrics.mean,
                                                   iteration)
                        break
        if iteration == max_iter:
            break

    checkpointer.save("model_{:07d}".format(max_iter))
    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Esempio n. 14
0
 def configure_optimizers(self):
     opt_fns = make_optimizer(self.cfg, self.model)
     lr_sched = make_lr_scheduler(self.cfg, opt_fns)
     return [opt_fns], [lr_sched]
 def test_optimzier(self):
     model = build_model(cfg)
     optimizer = make_optimizer(cfg, model)
     from IPython import embed;
     embed()