Exemple #1
0
                        num_workers=4)

# ------------ preparation ------------
if exp_cfg['model'] == "scnn":
    net = SCNN(resize_shape, pretrained=True)
elif exp_cfg['model'] == "enet_sad":
    net = ENet_SAD(resize_shape, sad=True, dataset=dataset_name)
else:
    raise Exception(
        "Model not match. 'model' in 'cfg.json' should be 'scnn' or 'enet_sad'."
    )

net = net.to(device)
net = torch.nn.DataParallel(net)

optimizer = optim.SGD(net.parameters(), **exp_cfg['optim'])
lr_scheduler = PolyLR(optimizer, 0.9, **exp_cfg['lr_scheduler'])
best_val_loss = 1e6


def train(epoch):
    print("Train Epoch: {}".format(epoch))
    net.train()
    train_loss = 0
    train_loss_seg = 0
    train_loss_exist = 0
    progressbar = tqdm(range(len(train_loader)))

    for batch_idx, sample in enumerate(train_loader):
        img = sample['img'].to(device)
        segLabel = sample['segLabel'].to(device)
Exemple #2
0
                        collate_fn=dataset.collate,
                        num_workers=4)

# ------------ preparation ------------
seg_classes = 5
if hasattr(dataset, 'seg_classes'):
    seg_classes = getattr(dataset, 'seg_classes')
net = SCNN(resize_shape,
           pretrained=True,
           seg_classes=seg_classes,
           weights=Dataset_Type.get_weights(
               exp_cfg['dataset']['other']['seg_mode']))
net = net.to(device)
#net = torch.nn.DataParallel(net)

optimizer = optim.Adam(net.parameters(), **exp_cfg['optim'])
lr_scheduler = PolyLR(optimizer, 0.9, **exp_cfg['lr_scheduler'])
best_val_loss = 1e6


def train(epoch):
    print("Train Epoch: {}".format(epoch))
    net.train()
    train_loss = 0
    train_loss_seg = 0
    train_loss_exist = 0
    progressbar = tqdm(range(len(train_loader)))

    for batch_idx, sample in enumerate(train_loader):
        img = sample['img'].to(device)
        segLabel = sample['segLabel'].to(device)
Exemple #3
0
net = SCNN(resize_shape, pretrained=True)
lr_scaler = 1
if torch.cuda.is_available():
    net.cuda()
    # Horovod: Scale learning rate as per number of devices
    if hvd.nccl_built():
        lr_scaler = hvd.local_size()

net = torch.nn.DataParallel(net)
lr = exp_cfg['optim']['lr']
momentum = exp_cfg['optim']['momentum']
weight_decay = exp_cfg['optim']['weight_decay']
nesterov = exp_cfg['optim']['nesterov']

# Horovod: scale learning rate by lr_scaler.
optimizer = optim.SGD(net.parameters(),
                      lr=lr * lr_scaler,
                      momentum=momentum,
                      weight_decay=weight_decay,
                      nesterov=nesterov)

# Horovod: broadcast parameters & optimizer state.
hvd.broadcast_parameters(net.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)

# Horovod: (optional) compression algorithm.
#compression = hvd.Compression.fp16

# Horovod: wrap optimizer with DistributedOptimizer.
gradient_predivide_factor = 1.0
optimizer = hvd.DistributedOptimizer(
Exemple #4
0
val_bdd100k = BDDDataset(image_path=bdd100k_val_img_path,
                         drivable_path=bdd100k_val_dl_path)
val_bdd100k_dataset_loader = DataLoader(dataset=val_bdd100k, **params)

#Declare model & optimizers
net = SCNN(resize_shape, pretrained=True)
net = net.to(device)
#torch.distributed.init_process_group("gloo", rank=rank, world_size=world_size)
#torch.cuda.set_device()
#net = torch.nn.parallel.DistributedDataParallel(net)
#net = torch.nn.DataParallel(net)
#
#net.eval()
tensorboard = SummaryWriter(exp_dir + "tb/")

optimizer = optim.SGD(net.parameters(), **optim_set)
lr_scheduler = PolyLR(optimizer, 0.9, **lr_set)
best_val_loss = 1000


#@profile
def train(epoch):
    print("Train Epoch: {}".format(epoch))
    net.train()
    train_loss = 0
    train_loss_seg = 0
    ##train_loss_exist = 0
    epoch_accuracy = 0

    progressbar = tqdm(range(len(train_bdd100k_dataset_loader)))
    #Training loop