def train():
    criterion = FocalLoss(NUM_CLASS, alpha=weights_t)
    criterion.to(device)

    def compute_loss(x, label):
        loss = criterion(x, label)
        prec = (x.argmax(1) == label).float().mean()
        return loss, prec

    print('train on:', device)
    model = Cnn().to(device)
    optim = torch.optim.Adam(model.parameters(), lr=1e-3)
    model.train()
    step = 0
    prec = 0
    target_step = 1200
    last_time = time.monotonic()
    is_saved = False
    best = 999
    while step < target_step or not is_saved:
        images_t, labels_t = get_data(img_files, item_id_map, circle_map,
                                      img_map)
        optim.zero_grad()
        score = model(images_t)
        loss, prec = compute_loss(score, labels_t)
        loss.backward()
        optim.step()
        if step < 10 or step % 50 == 0:
            print(step, loss.item(), prec.item(), time.monotonic() - last_time)
            last_time = time.monotonic()
        step += 1
        if step > target_step - 300 and best > loss.item():
            model.eval()
            if test(model):
                best = loss.item()
                print(f'save best {best}')
                model.train()
                torch.save(model.state_dict(), './model.pth')
                torch.onnx.export(model,
                                  torch.rand((1, 3, 60, 60)).to(device),
                                  'ark_material.onnx')
                is_saved = True
            else:
                model.train()

    from dl_data import request_get
    request_get(
        'https://purge.jsdelivr.net/gh/triwinds/arknights-ml@latest/inventory/index_itemid_relation.json',
        True)
    request_get(
        'https://purge.jsdelivr.net/gh/triwinds/arknights-ml@latest/inventory/ark_material.onnx',
        True)
Beispiel #2
0
def train_model(epochs,
                model,
                dl_train,
                device,
                dl_val=None,
                path=model_path,
                file_name='',
                print_freq=50):
    """ Trains the model for several epochs and saves the model that has best strong confidence predictions
    """

    optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
    total_steps = len(dl_train) * epochs
    if dl_val != None:
        evaluate = True
    else:
        evaluate = False

    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0,
                                                num_training_steps=total_steps)
    loss_fn = FocalLoss(gamma=4,
                        class_num=2).to(device)  #FocalLoss().to(device)
    best_correct_ratio = 0
    correct_ratio = 0
    if evaluate:
        _, correct_ratio = get_predictions(model,
                                           dl_val,
                                           device,
                                           n_examples=50,
                                           force_n_examples=True,
                                           use_targets=True)
        eval_model(0, model, dl_val, loss_fn, device)

    epoch = 0
    for epoch in range(epochs):
        print(f'Epoch {epoch + 1}/{epochs}')
        print('-' * 10)
        train_acc, train_loss = train_epoch(epoch,
                                            model,
                                            dl_train,
                                            loss_fn,
                                            optimizer,
                                            device,
                                            scheduler,
                                            print_freq=print_freq)
        if evaluate:
            val_acc, val_loss, f1 = eval_model(epoch, model, dl_val, loss_fn,
                                               device)
            _, correct_ratio = get_predictions(model,
                                               dl_val,
                                               device,
                                               n_examples=50,
                                               force_n_examples=True,
                                               use_targets=True)

        if correct_ratio > best_correct_ratio and file_name != '':
            print('Save. Epoch: ', epoch + 1)
            torch.save(model.state_dict(), path + '/' + file_name)
            best_correct_ratio = correct_ratio
Beispiel #3
0
def train_one_epoch(args, model, optimizer, train_loader, logger, model_saver):
    # criterion = nn.BCEWithLogitsLoss()
    # criterion = nn.BCELoss()
    criterion = FocalLoss()
    model.train()
    device = get_device(args)
    for step, (imgs, targets) in enumerate(train_loader, start=1):
        t1 = time.perf_counter()
        optimizer.zero_grad()
        targets_one_hot = label_to_one_hot(targets, n_class=args.n_class)

        # test the one-hot transform
        # targets_one_hot_argmax = targets_one_hot.argmax(dim=1, keepdim=True)
        # print(f'targets_one_hot_argmax:{targets_one_hot_argmax}\ntargets:{targets}')
        # print(f'check:{torch.eq(targets, targets_one_hot_argmax)}')

        imgs, targets_one_hot = imgs.to(device), targets_one_hot.to(device)
        outs = model(imgs).sigmoid()
        loss = criterion(input=outs, target=targets_one_hot)
        loss.backward()
        optimizer.step()
        t2 = time.perf_counter()
        print(
            f'step:{step} [{step}/{len(train_loader)}] '
            f'| loss:{loss.item():.8f} | lr:{get_lr(optimizer)} | time:{t2 - t1}'
        )
        logger.log(key='train_loss', data=loss.item())

        # save the model, optimizer every args.save_steps
        if step % args.save_steps == 0:
            logger.visualize(key='train_loss', range=(-1000, -1))
            logger.save_log()
            model_saver.save(name=args.model_name, model=model)
            model_saver.save(name=args.optimizer, model=optimizer)
Beispiel #4
0
 def __init__(self, num_classes=80):
     super(RetinaNet, self).__init__()
     self.fpn = FPN50()
     self.num_classes = num_classes
     self.loc_head = self._make_head(self.num_anchors * 4)
     self.cls_head = self._make_head(self.num_anchors * self.num_classes)
     self.focal_loss = FocalLoss()
Beispiel #5
0
    def __init__(self,
                 picker: Picker,
                 results_path: Path,
                 train_set: SeisDataset,
                 valid_set: SeisDataset,
                 test_set: SeisDataset,
                 device: torch.device,
                 batch_size: int,
                 lr: float,
                 num_workers: int,
                 freq_valid: int,
                 visual: Dict[str, List[int]],
                 dt_ms: float,
                 height_model: int,
                 width_model: int,
                 stopper: Stopper,
                 weights: torch.Tensor):

        self._picker = picker
        self._results_path = results_path
        self._train_set = train_set
        self._valid_set = valid_set
        self._test_set = test_set
        self._device = device
        self._batch_size = batch_size
        self._lr = lr
        self._num_workers = num_workers
        self._freq_valid = freq_valid

        self._visual = visual
        self._dt_ms = dt_ms
        self._height_model = height_model
        self._width_model = width_model
        self._stopper = stopper
        self._weights = weights

        # self._criterion = nn.CrossEntropyLoss(weight=self._weights).to(self._device)
        self._criterion = FocalLoss(alpha=self._weights, gamma=2)

        self._optimizer = torch.optim.Adam(picker.parameters(), lr=self._lr)

        self._net_path, self._tensorboard_path = self._results_path / 'net', self._results_path / 'tensorboard'

        for folder in [self._net_path, self._tensorboard_path]:
            folder.mkdir(exist_ok=True, parents=True)

        self._writer_tb = SummaryWriter(log_dir=str(self._tensorboard_path), flush_secs=20)
        self._picker.to(self._device)

        self._num_batch = 0

        self._correct_visual()
        self._freq_valid = min((self._freq_valid, len(self._train_set) // self._batch_size + 1))
Beispiel #6
0
 def __init__(self, cfg):
     super(type(self), self).__init__()
     # params and flags
     self.loss_lambda = cfg.loss_lambda
     self.im_size = cfg.im_size
     self.map_size = (down2n(cfg.im_size[0],cfg.conv_npool[-1]),down2n(cfg.im_size[1],cfg.conv_npool[-1]))
     self.bbox_thres = cfg.bbox_thres
     self.head_oproi = cfg.head_oproi
     # loss objects
     self.cl_loss = FocalLoss(gamma=cfg.loss_gamma, alpha=cfg.loss_alpha, size_average=True)
     self.cf_loss = nn.CrossEntropyLoss()
     self.op_loss = nn.BCEWithLogitsLoss()
Beispiel #7
0
    def __init__(self, pp):
        super(mirDNN, self).__init__()
        self.device = pp.device
        self.nll_correction = -0.5 + mt.log(2 * mt.exp(0.5))

        self.embedding = NucleotideEmbedding()
        layers = []
        layers.append(
            nn.Conv1d(in_channels,
                      pp.width,
                      kernel_size=pp.kernel_size,
                      padding=int(pp.kernel_size / 2)))
        seq_len = pp.seq_len
        while seq_len > 10:
            for i in range(pp.n_resnets):
                layers.append(
                    ResNet(pp.width,
                           nfilters=[pp.width, pp.width],
                           ksizes=[pp.kernel_size, pp.kernel_size]))
            layers.append(nn.MaxPool1d(2))
            seq_len = int(seq_len / 2)
        layers.append(nn.ELU())
        layers.append(nn.BatchNorm1d(pp.width))
        self.conv_layers = nn.Sequential(*layers)
        self.conv_out_dim = pp.width * seq_len

        self.ivar_layers = nn.BatchNorm1d(1)

        in_dim = self.conv_out_dim + 1
        layers = []
        layers.append(nn.Linear(in_dim, 32))
        layers.append(nn.ELU())
        layers.append(nn.BatchNorm1d(32))
        layers.append(nn.Linear(32, 1))
        layers.append(nn.Sigmoid())
        self.fcon_layers = nn.Sequential(*layers)

        if pp.focal_loss:
            self.loss_function = FocalLoss()
        else:
            self.loss_function = nn.BCELoss()
        self.to(device=self.device)
        self.optimizer = RAdam(self.parameters(), lr=5e-3, weight_decay=1e-5)
        self.lr_scheduler = ReduceLROnPlateau(self.optimizer,
                                              mode='max',
                                              factor=0.5,
                                              patience=100,
                                              min_lr=1e-6,
                                              eps=1e-9)
 def transfer_model_train(data_builder, json_file_name, weights_file_name,
                          metrics_file_name):
     """ Run with pretrained model """
     transfer_model = MultiModal(data_builder)
     transfer_model.compile_json_model(json_model=json_file_name,
                                       weights=weights_file_name)
     # transfer_model.compile_multi_modal_network(model_summary=False, save_img=True, save_json=True)
     transfer_model.get_label_ratios()
     focal_loss = FocalLoss(alpha=transfer_model.label_ratios,
                            class_proportions=True)
     transfer_model.train_model(epochs=10,
                                loss_function=focal_loss,
                                learning_rate=0.00001,
                                metrics=['loss', 'F1'],
                                predict_after_epoch=True,
                                save_weights=True,
                                save_metrics=True,
                                assert_weight_update=True,
                                weights_file_name=weights_file_name,
                                metrics_file_name=metrics_file_name)
    def __init__(self, args):
        super(Trainer, self).__init__()
        self.epoch = args.epoch
        self.batch_size = args.batch_size
        self.data_dir = args.data_dir

        self.save_dir = args.save_dir
        self.result_dir = args.result_dir
        self.log_dir = args.log_dir
        self.gpu_mode = args.gpu_mode
        self.verbose = args.verbose

        if args.model == 'fcn16s':
            self.model = FCN16s()
        elif args.model == 'fcn32s':
            self.model = FCN32s()
        elif args.model == 'fcn8s':
            self.model = FCN8s()
        elif args.model == 'pspnet':
            self.model = PSPnet()
        else:
            print("No this model type")
            exit(-1)
        if self.gpu_mode:
            self.model = self.model.cuda()
        self.parameter = self.model.parameters()
        self.optimizer = optim.Adam(self.parameter, lr=args.learning_rate)
        self.scheduler = optim.lr_scheduler.ExponentialLR(self.optimizer,
                                                          gamma=0.5)

        self.train_dataloader = get_data_loader(self.data_dir,
                                                self.batch_size,
                                                split='train')
        self.test_dataloader = get_data_loader(self.data_dir, 1, split='val')

        # experiment_id = args.model + time.strftime('%m%d%H%m')
        # self.writer = SummaryWriter(log_dir=self.log_dir + '/tboard_' + experiment_id)
        self.loss = FocalLoss(gamma=1.25)

        if args.pretrain != '':
            self._load_pretrain(args.pretrain)
Beispiel #10
0
def LossFunction(output, label, transform, regularization_weight=1e-3):
    b, n, c = output.shape
    output = output.view(-1, c)
    label = label.view(b * n)
    #weight = torch.from_numpy(np.array([0.06,1,1,1,1,1,1,1])).float().cuda()
    #criterion1 = nn.CrossEntropyLoss(weight=weight)

    output = output.float()
    label = label.long()
    weight = [0.06, 1, 1, 0.99, 1, 1, 1, 1]
    criterion1 = FocalLoss(gamma=2, alpha=weight)
    classify_loss = criterion1(output, label)

    batch_size, k, _ = transform.shape
    matrix_difference = torch.bmm(transform, transform.permute(0, 2, 1))
    identity = torch.from_numpy(np.eye(k).astype(np.float32)).repeat(
        batch_size, 1).cuda()
    identity = Variable(identity).cuda()
    criterion2 = nn.MSELoss()
    matrix_difference_loss = criterion2(matrix_difference, identity)
    return classify_loss + matrix_difference_loss * regularization_weight
 def train_new_model(data_builder, json_file_name, weights_file_name,
                     metrics_file_name):
     """ build, compile, train, test, and evaluate new model """
     model = MultiModal(data_builder)
     model.compile_multi_modal_network(model_summary=False,
                                       save_img=False,
                                       save_json=True,
                                       json_file_name=json_file_name)
     model.get_label_ratios()
     focal_loss = FocalLoss(alpha=model.label_ratios,
                            class_proportions=True)
     model.train_model(epochs=10,
                       loss_function=focal_loss,
                       learning_rate=0.0001,
                       metrics=['loss', 'F1'],
                       predict_after_epoch=True,
                       save_weights=True,
                       weights_file_name=weights_file_name,
                       save_metrics=True,
                       metrics_file_name=metrics_file_name,
                       assert_weight_update=True)
     model.predict_model()
    def train_step(self, net, batch, optimizer, device):
        net.train() # train mode

        imgs, true = batch # batch is NHWC
        imgs = imgs.permute(0, 3, 1, 2) # to NCHW

        # push data to GPUs and convert to float32
        imgs = imgs.to(device).float()
        true = true.to(device).long()# not one-hot

        # -----------------------------------------------------------
        net.zero_grad() # not rnn so not accumulate

        logit = net(imgs) # forward
        prob = F.softmax(logit, dim=1)

        # has built-int log softmax so accept logit
        # true = torch.squeeze(true)
        focal_loss=FocalLoss(gamma=0.5)

        loss = focal_loss(logit, true)

        prob = prob.permute(0, 2, 3, 1) # to NHWC
        pred = torch.argmax(prob, dim=-1)

        # with ignore index at 0
        foc = (true > 0).type(torch.float32)
        acc = (pred == true).type(torch.float32) * foc
        acc = torch.sum(acc) / torch.sum(foc)

        # gradient update
        loss.backward()
        optimizer.step()

        # -----------------------------------------------------------
        return dict(loss=loss.item(), 
                    acc=acc.item())
Beispiel #13
0
def train(dataLoader, netmodel, optimizer, epoch, logger, exp_args):
    batch_time = AverageMeter('batch_time')
    data_time = AverageMeter('data_time')

    losses = AverageMeter('losses')
    losses_mask = AverageMeter('losses_mask')

    if exp_args.addEdge == True:
        losses_edge_ori = AverageMeter('losses_edge_ori')
        losses_edge = AverageMeter('losses_edge')

    if exp_args.stability == True:
        losses_mask_ori = AverageMeter('losses_mask_ori')
        losses_stability_mask = AverageMeter('losses_stability_mask')
        losses_stability_edge = AverageMeter('losses_stability_edge')

    netmodel.train()  # switch to train mode

    loss_Softmax = nn.CrossEntropyLoss(ignore_index=255)  # mask loss
    # in our experiments, focalloss is better than l2 loss
    loss_Focalloss = FocalLoss(gamma=2)  # boundary loss
    # loss_l2 = nn.MSELoss() # boundary loss

    end = time.time()
    for i, (input_ori, input, edge, mask) in enumerate(dataLoader):
        data_time.update(time.time() - end)
        input_ori_var = Variable(input_ori.cuda())
        input_var = Variable(input.cuda())
        edge_var = Variable(edge.cuda())
        mask_var = Variable(mask.cuda())

        if exp_args.addEdge == True:
            output_mask, output_edge = netmodel(input_var)
            loss_mask = loss_Softmax(output_mask, mask_var)
            losses_mask.update(loss_mask.data.item(), input.size(0))

            # loss_edge = loss_l2(output_edge, edge_var) * exp_args.edgeRatio
            loss_edge = loss_Focalloss(output_edge,
                                       edge_var) * exp_args.edgeRatio

            losses_edge.update(loss_edge.data.item(), input.size(0))

            # total loss
            loss = loss_mask + loss_edge

            if exp_args.stability == True:
                output_mask_ori, output_edge_ori = netmodel(input_ori_var)
                loss_mask_ori = loss_Softmax(output_mask_ori, mask_var)
                losses_mask_ori.update(loss_mask_ori.data.item(),
                                       input.size(0))

                # loss_edge_ori = loss_l2(output_edge_ori, edge_var) * exp_args.edgeRatio
                loss_edge_ori = loss_Focalloss(output_edge_ori,
                                               edge_var) * exp_args.edgeRatio
                losses_edge_ori.update(loss_edge_ori.data.item(),
                                       input.size(0))

                # in our experiments, kl loss is better than l2 loss
                if exp_args.use_kl == False:
                    # consistency constraint loss: L2 distance
                    loss_stability_mask = loss_l2(
                        output_mask,
                        Variable(output_mask_ori.data,
                                 requires_grad=False)) * exp_args.alpha
                    loss_stability_edge = loss_l2(
                        output_edge,
                        Variable(output_edge_ori.data, requires_grad=False)
                    ) * exp_args.alpha * exp_args.edgeRatio
                else:
                    # consistency constraint loss: KL distance (better than L2 distance)
                    loss_stability_mask = loss_KL(
                        output_mask,
                        Variable(output_mask_ori.data, requires_grad=False),
                        exp_args.temperature) * exp_args.alpha
                    loss_stability_edge = loss_KL(
                        output_edge,
                        Variable(output_edge_ori.data,
                                 requires_grad=False), exp_args.temperature
                    ) * exp_args.alpha * exp_args.edgeRatio

                losses_stability_mask.update(loss_stability_mask.data.item(),
                                             input.size(0))
                losses_stability_edge.update(loss_stability_edge.data.item(),
                                             input.size(0))

                # total loss
                # loss = loss_mask + loss_mask_ori + loss_edge + loss_edge_ori + loss_stability_mask + loss_stability_edge
                loss = loss_mask + loss_mask_ori + loss_stability_mask + loss_edge
        else:
            output_mask = netmodel(input_var)
            loss_mask = loss_Softmax(output_mask, mask_var)
            losses_mask.update(loss_mask.data.item(), input.size(0))
            # total loss: only include mask loss
            loss = loss_mask

            if exp_args.stability == True:
                output_mask_ori = netmodel(input_ori_var)
                loss_mask_ori = loss_Softmax(output_mask_ori, mask_var)
                losses_mask_ori.update(loss_mask_ori.data.item(),
                                       input.size(0))
                if exp_args.use_kl == False:
                    # consistency constraint loss: L2 distance
                    loss_stability_mask = loss_l2(
                        output_mask,
                        Variable(output_mask_ori.data,
                                 requires_grad=False)) * exp_args.alpha
                else:
                    # consistency constraint loss: KL distance (better than L2 distance)
                    loss_stability_mask = loss_KL(
                        output_mask,
                        Variable(output_mask_ori.data, requires_grad=False),
                        exp_args.temperature) * exp_args.alpha
                losses_stability_mask.update(loss_stability_mask.data.item(),
                                             input.size(0))

                # total loss
                loss = loss_mask + loss_mask_ori + loss_stability_mask

        losses.update(loss.data.item(), input.size(0))

        # compute gradient and do Adam step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.printfreq == 0:
            print(
                'Epoch: [{0}][{1}/{2}]\t'
                'Lr-deconv: [{3}]\t'
                'Lr-other: [{4}]\t'
                # 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                # 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                    epoch,
                    i,
                    len(dataLoader),
                    optimizer.param_groups[0]['lr'],
                    optimizer.param_groups[1]['lr'],
                    loss=losses))

        ## '===========> logger <==========='
        # (1) Log the scalar values
        if exp_args.addEdge == True and exp_args.stability == True:
            info = {  # batch_time.name: batch_time.val,
                # data_time.name: data_time.val,
                losses.name: losses.val,
                losses_mask_ori.name: losses_mask_ori.val,
                losses_mask.name: losses_mask.val,
                losses_edge_ori.name: losses_edge_ori.val,
                losses_edge.name: losses_edge.val,
                losses_stability_mask.name: losses_stability_mask.val,
                losses_stability_edge.name: losses_stability_edge.val
            }
        elif exp_args.addEdge == True and exp_args.stability == False:
            info = { # batch_time.name: batch_time.val,
                     # data_time.name: data_time.val,
                     losses.name: losses.val,
                     losses_mask.name: losses_mask.val,
                     losses_edge.name: losses_edge.val,
                   }
        elif exp_args.addEdge == False and exp_args.stability == True:
            info = { # batch_time.name: batch_time.val,
                     # data_time.name: data_time.val,
                     losses.name: losses.val,
                     losses_mask_ori.name: losses_mask_ori.val,
                     losses_mask.name: losses_mask.val,
                     losses_stability_mask.name: losses_stability_mask.val,
                   }
        elif exp_args.addEdge == False and exp_args.stability == False:
            info = { # batch_time.name: batch_time.val,
                     # data_time.name: data_time.val,
                     losses.name: losses.val,
                     losses_mask.name: losses_mask.val,
                   }
        for tag, value in info.items():
            logger.scalar_summary(tag, value, step=i)
        '''
        # (2) Log values and gradients of the parameters (histogram)
        for tag, value in netmodel.named_parameters():
            tag = tag.replace('.', '/')
            logger.histo_summary(tag, value.data.cpu().numpy(), step=i)
            if value.grad is None:
                continue
            logger.histo_summary(tag+'/grad', value.grad.cpu().data.numpy(), step=i)
            break
        '''

        # (3) Log the images
        if i % (args.printfreq) == 0:
            num = 2
            input_img = np.uint8((Anti_Normalize_Img(
                np.transpose(input.cpu().numpy()[0:num], (0, 2, 3, 1)),
                scale=exp_args.img_scale,
                mean=exp_args.img_mean,
                val=exp_args.img_val)))[:, :, :, :3][:, :, :, ::-1]

            if exp_args.video == True:
                input_prior = np.float32(
                    np.transpose(input.cpu().numpy()[0:num],
                                 (0, 2, 3, 1))[:, :, :, 3])

            input_mask = mask.cpu().numpy()[0:num]
            input_mask[input_mask == 255] = 0
            softmax = nn.Softmax(dim=1)
            prob = softmax(output_mask)
            masks_pred = np.transpose(prob.data.cpu().numpy()[0:num],
                                      (0, 2, 3, 1))[:, :, :, 1]

            info = {}
            info['input_img'] = input_img
            if exp_args.video == True:
                info['input_prior'] = input_prior * 255
            info['input_mask'] = input_mask * 255
            info['output_mask'] = masks_pred * 255

            if exp_args.addEdge == True:
                input_edge = edge.cpu().numpy()[0:num]
                edge_pred = np.transpose(output_edge.data.cpu().numpy()[0:num],
                                         (0, 2, 3, 1))[:, :, :, 0]

                if exp_args.stability == True:
                    input_img_ori = np.uint8((Anti_Normalize_Img(
                        np.transpose(input_ori.cpu().numpy()[0:num],
                                     (0, 2, 3, 1)),
                        scale=exp_args.img_scale,
                        mean=exp_args.img_mean,
                        val=exp_args.img_val)))[:, :, :, :3][:, :, :, ::-1]

                    prob_ori = softmax(output_mask_ori)
                    masks_pred_ori = np.transpose(
                        prob_ori.data.cpu().numpy()[0:num],
                        (0, 2, 3, 1))[:, :, :, 1]
                    edge_pred_ori = np.transpose(
                        output_edge_ori.data.cpu().numpy()[0:num],
                        (0, 2, 3, 1))[:, :, :, 0]

                    info['input_img_ori'] = input_img_ori
                    info['output_mask_ori'] = masks_pred_ori * 255

                    info['input_edge'] = input_edge * 255
                    info['output_edge'] = edge_pred * 255
                    info['output_edge_ori'] = edge_pred_ori * 255
                else:
                    info['input_edge'] = input_edge * 255
                    info['output_edge'] = edge_pred * 255
            else:
                if exp_args.stability == True:
                    input_img_ori = np.uint8((Anti_Normalize_Img(
                        np.transpose(input_ori.cpu().numpy()[0:num],
                                     (0, 2, 3, 1)),
                        scale=exp_args.img_scale,
                        mean=exp_args.img_mean,
                        val=exp_args.img_val)))[:, :, :, :3][:, :, :, ::-1]

                    prob_ori = softmax(output_mask_ori)
                    masks_pred_ori = np.transpose(
                        prob_ori.data.cpu().numpy()[0:num],
                        (0, 2, 3, 1))[:, :, :, 1]

                    info['input_img_ori'] = input_img_ori
                    info['output_mask_ori'] = masks_pred_ori * 255

            print(np.max(masks_pred), np.min(masks_pred))

            for tag, images in info.items():
                logger.image_summary(tag, images, step=i)

    pass
Beispiel #14
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = float('-inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        elif args.network == 'mobile':
            model = MobileNetV2()
        else:
            raise TypeError('network {} is not supported.'.format(
                args.network))

        # print(model)
        model = nn.DataParallel(model)
        metric_fc = ArcMarginModel(args)
        metric_fc = nn.DataParallel(metric_fc)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                        lr=args.lr,
                                        momentum=args.mom,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    logger = get_logger()

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4)

    scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        # One epoch's training
        train_loss, train_acc = train(train_loader=train_loader,
                                      model=model,
                                      metric_fc=metric_fc,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      epoch=epoch,
                                      logger=logger)

        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_acc', train_acc, epoch)

        # One epoch's validation
        lfw_acc, threshold = lfw_test(model)
        writer.add_scalar('model/valid_acc', lfw_acc, epoch)
        writer.add_scalar('model/valid_thres', threshold, epoch)

        # Check if there was an improvement
        is_best = lfw_acc > best_acc
        best_acc = max(lfw_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best)

        scheduler.step(epoch)
def train(model_name, outputDir):
    train_dataset = FurnitureDataset('train', transform=preprocess_with_augmentation)
    val_dataset = FurnitureDataset('val', transform=preprocess)
    training_data_loader = DataLoader(dataset=train_dataset, num_workers=12,
                                      batch_size=BATCH_SIZE,
                                      shuffle=True)
    validation_data_loader = DataLoader(dataset=val_dataset, num_workers=1,
                                        batch_size=BATCH_SIZE,
                                        shuffle=False)

    model = get_model(model_name)

    nb_learnable_params = sum(p.numel() for p in model.fresh_params())
    print('Number of learnable params: %s' % str(nb_learnable_params))

    # Use model.fresh_params() to train only the newly initialized weights
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)

    if model_name.endswith("_focal"):
        print ("Using Focal loss instead of normal cross-entropy")
        criterion = FocalLoss(NB_CLASSES).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    min_loss = float("inf")
    max_acc = 0.0
    patience = 0
    for epoch in range(NUM_EPOCHS):
        print('Epoch: %d' % epoch)
        
        running_loss = RunningMean()
        running_error = RunningMean()
        running_accuracy = RunningMean()

        model.train()
        pbar = tqdm(training_data_loader, total=len(training_data_loader))
        for inputs, labels in pbar:
            batch_size = inputs.size(0)

            inputs = Variable(inputs)
            labels = Variable(labels)
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            _, preds = torch.max(outputs.data, dim=1)

            loss = criterion(outputs, labels)
            running_loss.update(loss.data[0], 1)
            running_error.update(torch.sum(preds != labels.data), batch_size)
            running_accuracy.update(torch.sum(preds == labels.data), batch_size)

            loss.backward()
            optimizer.step()

            pbar.set_description('%.5f %.3f %.3f' % (running_loss.value, running_accuracy.value, running_error.value))
        print('Epoch: %d | Running loss: %.5f | Running accuracy: %.3f | Running error: %.3f' % (epoch, running_loss.value, running_accuracy.value, running_error.value))

        lx, px = utils.predict(model, validation_data_loader, device)
        log_loss = criterion(Variable(px), Variable(lx))
        log_loss = log_loss.data[0]
        _, preds = torch.max(px, dim=1)
        accuracy = torch.mean((preds == lx).float())
        error = torch.mean((preds != lx).float())
        print('Validation loss: %.5f | Accuracy: %.3f | Error: %.3f' % (log_loss, accuracy, error))
        scheduler.step(log_loss)

        # Save model after each epoch
        torch.save(model.state_dict(), os.path.join(outputDir, 'weight_' + model_name + '.pth'))

        betterModelFound = False
        if log_loss < min_loss:
            torch.save(model.state_dict(), os.path.join(outputDir, 'best_val_loss_weight_' + model_name + '.pth'))
            print('Validation score improved from %.5f to %.5f. Model snapshot saved!' % (min_loss, log_loss))
            min_loss = log_loss
            patience = 0
            betterModelFound = True

        if accuracy > max_acc:
            torch.save(model.state_dict(), os.path.join(outputDir, 'best_val_acc_weight_' + model_name + '.pth'))
            print('Validation accuracy improved from %.5f to %.5f. Model snapshot saved!' % (max_acc, accuracy))
            max_acc = accuracy
            patience = 0
            betterModelFound = True

        if not betterModelFound:
            patience += 1
Beispiel #16
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = 0
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        elif args.network == 'mobile':
            model = MobileNet(1.0)
        elif args.network == 'mr18':
            print("mr18")
            model = myResnet18()
        else:
            model = resnet_face18(args.use_se)
        model = nn.DataParallel(model)
        metric_fc = ArcMarginModel(args)
        metric_fc = nn.DataParallel(metric_fc)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                        lr=args.lr,
                                        momentum=args.mom,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    logger = get_logger()

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True)

    scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        scheduler.step()

        if args.full_log:
            lfw_acc, threshold = lfw_test(model)
            writer.add_scalar('LFW_Accuracy', lfw_acc, epoch)
            full_log(epoch)

        start = datetime.now()
        # One epoch's training
        train_loss, train_top5_accs = train(train_loader=train_loader,
                                            model=model,
                                            metric_fc=metric_fc,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            epoch=epoch,
                                            logger=logger,
                                            writer=writer)

        writer.add_scalar('Train_Loss', train_loss, epoch)
        writer.add_scalar('Train_Top5_Accuracy', train_top5_accs, epoch)

        end = datetime.now()
        delta = end - start
        print('{} seconds'.format(delta.seconds))

        # One epoch's validation
        lfw_acc, threshold = lfw_test(model)
        writer.add_scalar('LFW Accuracy', lfw_acc, epoch)

        # Check if there was an improvement
        is_best = lfw_acc > best_acc
        best_acc = max(lfw_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best)
Beispiel #17
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    best_loss = 100000
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = None
    start_epoch = 0
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r100':
            model = resnet101(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r18':
            model = resnet18(args)
        else:  # 'face'
            model = resnet50(args)
        optimizer = torch.optim.SGD(params=filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr,
                                    momentum=args.mom, weight_decay=args.weight_decay)
    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    # Move to GPU, if available
    model = model.to(device)

    # Loss function
    if args.focal_loss:
        age_criterion = FocalLoss(gamma=args.gamma).to(device)
        gender_criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        age_criterion = nn.CrossEntropyLoss().to(device)
        gender_criterion = nn.CrossEntropyLoss().to(device)

    criterion_info = (age_criterion, gender_criterion, args.age_weight)

    # Custom dataloaders
    train_dataset = AgeGenDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=workers,
                                               pin_memory=True)
    val_dataset = AgeGenDataset('valid')
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=workers,
                                             pin_memory=True)

    scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1)

    # Epochs
    for epoch in range(start_epoch, epochs):
        scheduler.step()

        # One epoch's training
        train_loss, train_gen_accs, train_age_mae = train(train_loader=train_loader,
                                                          model=model,
                                                          criterion_info=criterion_info,
                                                          optimizer=optimizer,
                                                          epoch=epoch)
        writer.add_scalar('Train Loss', train_loss, epoch)
        writer.add_scalar('Train Gender Accuracy', train_gen_accs, epoch)
        writer.add_scalar('Train Age MAE', train_age_mae, epoch)

        # One epoch's validation
        valid_loss, valid_gen_accs, valid_age_mae = validate(val_loader=val_loader,
                                                             model=model,
                                                             criterion_info=criterion_info)

        writer.add_scalar('Valid Loss', valid_loss, epoch)
        writer.add_scalar('Valid Gender Accuracy', valid_gen_accs, epoch)
        writer.add_scalar('Valid Age MAE', valid_age_mae, epoch)

        # Check if there was an improvement
        is_best = valid_loss < best_loss
        best_loss = min(valid_loss, best_loss)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, optimizer, best_loss, is_best)
Beispiel #18
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = 0
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        elif args.network == 'mobile':
            model = MobileNet(1.0)
        else:
            model = resnet_face18(args.use_se)
        model = nn.DataParallel(model)
        metric_fc = ArcMarginModel(args)
        metric_fc = nn.DataParallel(metric_fc)

        if args.optimizer == 'sgd':
            # optimizer = torch.optim.SGD([{'params': model.parameters()}, {'params': metric_fc.parameters()}],
            #                             lr=args.lr, momentum=args.mom, weight_decay=args.weight_decay)
            optimizer = InsightFaceOptimizer(
                torch.optim.SGD([{
                    'params': model.parameters()
                }, {
                    'params': metric_fc.parameters()
                }],
                                lr=args.lr,
                                momentum=args.mom,
                                weight_decay=args.weight_decay))
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    logger = get_logger()

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=8)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        # One epoch's training
        train_loss, train_top1_accs = train(train_loader=train_loader,
                                            model=model,
                                            metric_fc=metric_fc,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            epoch=epoch,
                                            logger=logger)
        print('\nCurrent effective learning rate: {}\n'.format(optimizer.lr))
        print('Step num: {}\n'.format(optimizer.step_num))

        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_accuracy', train_top1_accs, epoch)
        writer.add_scalar('model/learning_rate', optimizer.lr, epoch)

        # One epoch's validation
        megaface_acc = megaface_test(model)
        writer.add_scalar('model/megaface_accuracy', megaface_acc, epoch)

        # Check if there was an improvement
        is_best = megaface_acc > best_acc
        best_acc = max(megaface_acc, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best)
Beispiel #19
0
def train():

    device = args.device

    log_every = args.log_every
    valid_iter = args.valid_iter
    train_iter = 0
    cum_loss = 0
    avg_loss = 0
    avg_util_loss = 0
    avg_answer_loss = 0
    valid_num = 0
    patience = 0
    num_trial = 0
    hist_valid_scores = []
    begin_time = time.time()

    vocab = get_vocab(args.vocab_file)

    model = EVPI(args, vocab)

    if args.use_embed == 1:
       model.load_vector(args, vocab)

    print("Placing model on ", args.device)
    if args.device == 'cuda':
       model.cuda()

    lr = args.lr
    optim = torch.optim.Adam(list(model.parameters()), lr=lr)

    # The loss functions
    #criterion = torch.nn.CrossEntropyLoss().to(device=device)
    criterion = FocalLoss(gamma=5).to(device=device)

    print("Beginning Training")
    model.train()

    cosine_function = torch.nn.functional.cosine_similarity

    model_counter = 0
    train_iter = 0
    for ep in range(args.max_epochs):

        val_iter = 0

        count = 0
        hello = set()
        for ids, posts, questions, answers, labels in batch_iter(train_ids, \
                            post_content, qa_dict, vocab, args.batch_size, shuffle=False):

            train_iter += 1

            optim.zero_grad()

            question_vectors = vocab.id2vector(questions)
            post_vectors = vocab.id2vector(posts)
            answer_vectors = vocab.id2vector(answers)

            padded_posts, post_pad_idx = pad_sequence(args.device, posts)
            padded_questions, question_pad_idx = pad_sequence(args.device, questions)
            padded_answers, answer_pad_idx = pad_sequence(args.device, answers)

            pqa_probs = model(ids, (padded_posts, post_pad_idx),\
                      (padded_questions, question_pad_idx),\
                      (padded_answers, answer_pad_idx))

            labels = torch.tensor(labels).to(device=args.device)
            total_loss = criterion(pqa_probs, labels)

            #bp()

            avg_loss += total_loss.item()
            cum_loss += total_loss.item()

            total_loss.backward()
            torch.nn.utils.clip_grad_norm_(list(model.parameters()), args.clip_grad)
            optim.step()

            if train_iter % log_every == 0:
                print('epoch %d, iter %d, avg.loss %.6f, time elapsed %.2f'\
                     % (ep + 1, train_iter, avg_loss / log_every, time.time() - begin_time), file=sys.stderr)

                begin_time = time.time()
                avg_loss = 0

            if train_iter % valid_iter == 0:

                print('epoch %d, iter %d, cum.loss %.2f, time elapsed %.2f'\
                     % (ep + 1, train_iter, cum_loss / valid_iter, time.time() - begin_time), file=sys.stderr)

                cum_loss = 0
                valid_num += 1

                print("Begin Validation ", file=sys.stderr)

                model.eval()

                val_loss = get_val_loss(vocab, args, model)
                model.train()

                print('validation: iter %d, loss %f' % (train_iter, val_loss), file=sys.stderr)

                is_better = (len(hist_valid_scores) == 0) or (val_loss < min(hist_valid_scores))
                hist_valid_scores.append(val_loss)

                if is_better:
                    patience = 0
                    print("Save the current model and optimiser state")
                    torch.save(model, args.model_save_path)
                    #torch.save(model, args.model_save_path + '.' + str(val_loss) + '-' + str(model_counter))
                    #model_counter += 1
                    torch.save(optim.state_dict(), args.model_save_path + '.optim')

                elif patience < args.patience:

                    patience += 1
                    print('hit patience %d' % patience, file=sys.stderr)

                    if patience == args.patience:
                        num_trial += 1
                        print('hit #%d trial' % num_trial, file=sys.stderr)
                        if num_trial == args.max_num_trials:
                            print('early stop!', file=sys.stderr)
                            return

                        lr = lr * args.lr_decay

                        print('load previously best model and decay learning rate to %f' % lr, file=sys.stderr)
                        model = load(args.model_save_path)
                        model.train()

                        print('restore parameters of the optimizers', file=sys.stderr)

                        optim = torch.optim.Adam(list(model.parameters()), lr=lr)
                        optim.load_state_dict(torch.load(args.model_save_path + '.optim'))
                        for state in optim.state.values():
                            for k, v in state.items():
                                if isinstance(v, torch.Tensor):
                                    state[k] = v.to(args.device)
                        for group in optim.param_groups:
                            group['lr'] = lr

                        patience = 0
    print("Training Finished", file=sys.stderr) 
Beispiel #20
0
def train_net(
    net,
    epochs=20,
    batch_size=32,
    lr=0.01,
    save_cp=True,
    gpu=True,
):

    # # setting paths
    root_data = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             'data/hand/')
    dir_checkpoint = 'checkpoints/hand/'
    writer = SummaryWriter('log/hand_batch8')

    # # setting data
    train_set = Hand(root_data, train=True)
    test_set = Hand(root_data, test=True)
    train_data = DataLoader(train_set,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=12)
    test_data = DataLoader(test_set,
                           batch_size=1,
                           shuffle=False,
                           num_workers=12)
    N_train = train_set.getLen()
    N_test = test_set.getLen()

    # # setting optimizer
    # optimizer = torch.optim.Adam(
    #     net.parameters(),
    #     lr=lr,
    #     weight_decay=1e-3)

    optimizer = optim.SGD(net.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=0.0005)

    # # setting loss fuc
    use_focal = True
    use_CE = False
    use_dice = False
    use_iou = False
    if use_focal:
        # to use focal loss
        criterion = FocalLoss(class_num=2, gamma=2)
    elif use_CE:
        # # to use CEloss with weight
        weight = torch.Tensor([2, 3])
        if gpu:
            weight = weight.cuda()
        criterion = torch.nn.CrossEntropyLoss(weight=weight)
    elif use_dice:
        criterion = soft_dice_loss
    elif use_iou:
        # to use BCE loss
        criterion1 = nn.BCELoss()
        criterion2 = mIoULoss()
    else:
        criterion = nn.BCELoss()

    processed_batch = 0

    print('''
    Starting training:
        Epochs: {}
        Batch size: {}
        Use FocalLoss: {}
        Learning rate: {}
        Training size: {}
        Validation size: {}
        Checkpoints: {}
        CUDA: {}
    '''.format(epochs, batch_size, str(use_focal), lr, N_train, N_test,
               str(save_cp), str(gpu)))

    for epoch in range(epochs):
        print('Starting epoch {}/{}.'.format(epoch + 1, epochs))
        epoch_loss = 0
        num_i = 0

        # Sets the learning rate to the initial LR decayed by 10 every 20 epochs when epoch < 70
        if (epoch + 1) % 10 == 0 and epoch < 50:
            for param_group in optimizer.param_groups:
                param_group['lr'] = param_group['lr'] * 0.1
                print('NOTE!!! Learn rate is changed to ' +
                      str(param_group['lr'] * 0.1))

        for ii, (imgs, true_masks) in enumerate(train_data):
            num_i += 1
            processed_batch += 1

            imgs = Variable(imgs)
            true_masks = Variable(true_masks)
            if use_iou:
                true_masks_miou = Variable(to_one_hot(true_masks.long(), 2))
            if use_focal or use_CE:
                true_masks = true_masks.long()
            if gpu:
                imgs = imgs.cuda()
                true_masks = true_masks.cuda()
                if use_iou:
                    true_masks_miou = true_masks_miou.cuda()

            optimizer.zero_grad()
            masks_pred = net(imgs)

            if use_focal or use_CE:
                # # to use classification loss
                if use_CE:
                    masks_pred = masks_pred.contiguous().view(
                        masks_pred.size(0), masks_pred.size(1), -1)
                    masks_pred = masks_pred.transpose(1, 2)
                    masks_pred = masks_pred.contiguous().view(
                        -1, masks_pred.size(2)).squeeze()
                    true_masks = true_masks.contiguous().view(
                        true_masks.size(0), true_masks.size(1), -1)
                    true_masks = true_masks.transpose(1, 2)
                    true_masks = true_masks.contiguous().view(
                        -1, true_masks.size(2)).squeeze()
                loss = criterion(masks_pred, true_masks)
            elif use_dice:
                loss = criterion(masks_pred, true_masks)
            elif use_iou:
                # # combine iou and dice loss
                # channel0 = torch.ones(masks_pred.size())
                # if masks_pred.is_cuda:
                #     channel0.cuda()
                channel0 = 1 - masks_pred
                masks_pred_iou = torch.cat((channel0, masks_pred), dim=1)
                masks_pred = F.sigmoid(masks_pred)
                masks_probs_flat = masks_pred.view(-1)
                true_masks_flat = true_masks.view(-1)
                loss1 = criterion1(masks_probs_flat, true_masks_flat)
                # # 需要把输入变为双通道
                loss2 = criterion2(masks_pred_iou, true_masks_miou)
                loss = loss1.div(2) + loss2.div(2)
            else:
                masks_pred = F.sigmoid(masks_pred)
                masks_probs_flat = masks_pred.view(-1)
                loss = criterion(masks_probs_flat, masks_probs_flat)

            epoch_loss += loss.data[0]

            writer.add_scalar('loss', loss.data[0], processed_batch)

            loss.backward()
            optimizer.step()

        print('Epoch finished ! Loss: {}'.format(epoch_loss / num_i))
        writer.add_scalar('train_loss_epoch', epoch_loss / num_i, epoch + 1)

        # # test the net
        net.eval()

        # # use dice coff
        # val_score = eval_net(net, test_data, gpu, focal=use_focal, CE=use_CE, dice=use_dice)
        # print('Validation Dice Coeff: {}'.format(val_score))
        # writer.add_scalar('val_dice', val_score, epoch + 1)

        # # use Jaccard(iou) index
        val_score = calcul_iou_for_focal(net, test_data, gpu)
        print('Validation jaccard_similarity_score is : {}'.format(val_score))
        writer.add_scalar('val_iou', val_score, epoch + 1)

        net.train()

        if save_cp and val_score > 0.90:
            torch.save(
                net.state_dict(), dir_checkpoint +
                'CP{}_deeper_SE_{:.4}.pth'.format(epoch + 1, val_score))
            print('Checkpoint {} saved !'.format(epoch + 1))
Beispiel #21
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = float('-inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        elif args.network == 'mobile':
            from mobilenet_v2 import MobileNetV2
            model = MobileNetV2()
        else:
            raise TypeError('network {} is not supported.'.format(
                args.network))

        metric_fc = ArcMarginModel(args)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                        lr=args.lr,
                                        momentum=args.mom,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    model = nn.DataParallel(model)
    metric_fc = nn.DataParallel(metric_fc)

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=num_workers)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        # Decay learning rate if there is no improvement for 2 consecutive epochs, and terminate training after 10
        if epochs_since_improvement == 10:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 2 == 0:
            checkpoint = 'BEST_checkpoint.tar'
            checkpoint = torch.load(checkpoint)
            model = checkpoint['model']
            metric_fc = checkpoint['metric_fc']
            optimizer = checkpoint['optimizer']

            adjust_learning_rate(optimizer, 0.5)

        # One epoch's training
        train_loss, train_top1_accs = train(train_loader=train_loader,
                                            model=model,
                                            metric_fc=metric_fc,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            epoch=epoch)
        lr = optimizer.param_groups[0]['lr']
        print('\nCurrent effective learning rate: {}\n'.format(lr))
        # print('Step num: {}\n'.format(optimizer.step_num))

        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_accuracy', train_top1_accs, epoch)
        writer.add_scalar('model/learning_rate', lr, epoch)

        if epoch % 5 == 0:
            # One epoch's validation
            megaface_acc = megaface_test(model)
            writer.add_scalar('model/megaface_accuracy', megaface_acc, epoch)

            # Check if there was an improvement
            is_best = megaface_acc > best_acc
            best_acc = max(megaface_acc, best_acc)
            if not is_best:
                epochs_since_improvement += 1
                print("\nEpochs since last improvement: %d\n" %
                      (epochs_since_improvement, ))
            else:
                epochs_since_improvement = 0

            # Save checkpoint
            save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                            optimizer, best_acc, is_best)
def get_focal_loss(classifier):
    print("==> Using Focal Loss.....")
    classifier.writer.add_text('Info', "Using Focal Loss ")
    return FocalLoss(gamma)
def main(args, GAMMA, pretrain_model_path=None):

    # Now load pickle labels mapping file
    class_dict_fname = F_CLASS_DICT_PKL
    print(class_dict_fname)
    with open(class_dict_fname, "rb") as f:
        class_dict, _ = pickle.load(f)
        f.close()

    print("CLASS DICT: {}".format(class_dict))

    # Use to get numeric classes --> semantic classes
    seg_classes = class_dict
    seg_label_to_cat = {}
    print(seg_label_to_cat)
    for i, cat in enumerate(seg_classes.values()):
        seg_label_to_cat[i] = cat

    print('SEG LABEL', seg_label_to_cat)

    # First load class weights file
    with open(F_CLASS_WEIGHTS_PKL, "rb") as f:
        class_weights = pickle.load(f)
        f.close()
    print('SEG CLASSES', seg_classes)
    COUNTS = np.array(
        [class_weights[key] for key in list(class_weights.keys())])
    weight_normalizer = np.max(COUNTS)

    weights = []
    for count in COUNTS:
        if count != 0:
            weights.append(weight_normalizer / count)
        else:
            weights.append(0)

    # Threshold weights
    WEIGHTS_NP = np.array(weights)
    WEIGHTS_NP[WEIGHTS_NP > THRESHOLD] = THRESHOLD

    print("WEIGHTS ARE: {}".format(WEIGHTS_NP))

    # Convert to pytorch tensor
    weights = torch.from_numpy(WEIGHTS_NP.astype('float32'))

    if USE_CLI:
        gpu = args.gpu
        multi_gpu = args.multi_gpu
        batch_size = args.batch_size
        model_name = args.model_name
        optimizer = args.optimizer
        learning_rate = args.learning_rate
        pretrain = args.pretrain
        multi_gpu = args.multi_gpu
        batchsize = args.batchsize
        decay_rate = args.decay_rate
        epochs = args.epochs
    else:
        gpu = GPU
        multi_gpu = MULTI_GPU
        batch_size = BATCH_SIZE
        model_name = MODEL_NAME
        optimizer = OPTIMIZER
        learning_rate = LEARNING_RATE
        pretrain = PRETRAIN
        multi_gpu = MULTI_GPU
        batchsize = BATCH_SIZE
        decay_rate = DECAY_RATE
        epochs = EPOCHS

    os.environ[
        "CUDA_VISIBLE_DEVICES"] = gpu if multi_gpu is None else '0,1,2,3'
    '''CREATE DIR'''
    experiment_dir = Path('./experiment/{}'.format(
        EXPERIMENT_HEADER.format(GAMMA)))
    experiment_dir.mkdir(exist_ok=True)
    file_dir = Path(
        str(experiment_dir) + '/%sSemSeg-' % model_name +
        str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')))
    file_dir.mkdir(exist_ok=True)
    checkpoints_dir = file_dir.joinpath('checkpoints/')
    checkpoints_dir.mkdir(exist_ok=True)
    log_dir = file_dir.joinpath('logs/')
    log_dir.mkdir(exist_ok=True)
    '''LOG'''
    if USE_CLI:
        args = parse_args()
        logger = logging.getLogger(model_name)
    else:
        logger = logging.getLogger(MODEL_NAME)
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    if USE_CLI:
        file_handler = logging.FileHandler(
            str(log_dir) + '/train_%s_semseg.txt' % args.model_name)
    else:
        file_handler = logging.FileHandler(
            str(log_dir) + '/train_%s_semseg.txt' % MODEL_NAME)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    logger.info(
        '---------------------------------------------------TRANING---------------------------------------------------'
    )
    if USE_CLI:
        logger.info('PARAMETER ...')
        logger.info(args)
    print('Load data...')
    #train_data, train_label, test_data, test_label = recognize_all_data(test_area = 5)

    # Now pickle our dataset
    if USE_CLI:
        f_in = args.data_path
    else:
        f_in = DATA_PATH

    # Now pickle file
    with open(f_in, "rb") as f:
        DATA = pickle.load(f)
        f.close()

    random_seed = 42
    indices = [i for i in range(len(list(DATA.keys())))]
    np.random.seed(random_seed)
    np.random.shuffle(indices)
    TEST_SPLIT = 0.2
    test_index = int(np.floor(TEST_SPLIT * len(list(DATA.keys()))))
    print("val index is: {}".format(test_index))
    train_indices, test_indices = indices[test_index:], indices[:test_index]
    if USE_CLI:
        print("LEN TRAIN: {}, LEN TEST: {}, EPOCHS: {}, OPTIMIZER: {}, DECAY_RATE: {}, LEARNING RATE: {}, \
        DATA PATH: {}"                      .format(len(train_indices), len(test_indices), epochs, args.optimizer, args.decay_rate, \
                              args.learning_rate, args.data_path))
    else:
        print("LEN TRAIN: {}, LEN TEST: {}, EPOCHS: {}, OPTIMIZER: {}, DECAY_RATE: {}, LEARNING RATE: {}, \
        DATA PATH: {}"                      .format(len(train_indices), len(test_indices), EPOCHS, OPTIMIZER, DECAY_RATE, \
                              LEARNING_RATE, DATA_PATH))

    # Creating PT data samplers and loaders:
    train_sampler = SubsetRandomSampler(train_indices)
    test_sampler = SubsetRandomSampler(test_indices)
    print("INTERSECTION OF TRAIN/TEST (should be 0): {}".format(
        len(set(train_indices).intersection(set(test_indices)))))

    # Training dataset
    dataset = A2D2DataLoader(DATA)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batchsize,
                                             shuffle=False,
                                             sampler=train_sampler,
                                             collate_fn=collate_fn)
    # Test dataset
    test_dataset = A2D2DataLoader(DATA)
    testdataloader = torch.utils.data.DataLoader(test_dataset,
                                                 batch_size=batchsize,
                                                 shuffle=False,
                                                 sampler=test_sampler,
                                                 collate_fn=collate_fn)

    num_classes = NUM_CLASSES

    blue = lambda x: '\033[94m' + x + '\033[0m'
    model = PointNet2SemSeg(
        num_classes) if model_name == 'pointnet2' else PointNetSeg(
            num_classes, feature_transform=True, semseg=True)

    if pretrain_model_path is not None:
        model.load_state_dict(torch.load(pretrain_model_path))
        print('load model %s' % pretrain_model_path)
        logger.info('load model %s' % pretrain_model_path)
    else:
        print('Training from scratch')
        logger.info('Training from scratch')
    #pretrain_var = pretrain
    init_epoch = int(pretrain_var[-14:-11]) if pretrain is not None else 0

    if optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    elif optimizer == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=learning_rate,
                                     betas=(0.9, 0.999),
                                     eps=1e-08,
                                     weight_decay=decay_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=20,
                                                gamma=0.5)
    LEARNING_RATE_CLIP = 1e-5
    '''GPU selection and multi-GPU'''
    if multi_gpu is not None:
        device_ids = [int(x) for x in multi_gpu.split(',')]
        torch.backends.cudnn.benchmark = True
        model.cuda(device_ids[0])
        model = torch.nn.DataParallel(model, device_ids=device_ids)
    else:
        model.cuda()

    history = defaultdict(lambda: list())
    best_acc = 0
    best_meaniou = 0
    graph_losses = []
    steps = []
    step = 0
    print("NUMBER OF EPOCHS IS: {}".format(epochs))
    for epoch in range(epochs):
        scheduler.step()
        lr = max(optimizer.param_groups[0]['lr'], LEARNING_RATE_CLIP)
        print('Learning rate:%f' % lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        counter = 0
        # Init confusion matrix
        if USE_CONMAT:
            conf_matrix = torch.zeros(NUM_CLASSES, NUM_CLASSES)
        for points, targets in tqdm(dataloader):
            #for points, target in tqdm(dataloader):
            #points, target = data
            points, targets = Variable(points.float()), Variable(
                targets.long())
            points = points.transpose(2, 1)
            points, targets = points.cuda(), targets.cuda()
            weights = weights.cuda()
            optimizer.zero_grad()  # REMOVE gradients
            model = model.train()
            if model_name == 'pointnet':
                pred, trans_feat = model(points)
            else:
                pred = model(
                    points[:, :3, :], points[:, 3:, :]
                )  # Channels: xyz_norm (first 3) | rgb_norm (second three)
                #pred = model(points)
            if USE_CONMAT:
                conf_matrix = confusion_matrix(pred, targets, conf_matrix)
            pred = pred.contiguous().view(-1, num_classes)
            targets = targets.view(-1, 1)[:, 0]
            loss = FocalLoss(gamma=GAMMA)(pred, targets)
            #loss = F.nll_loss(pred, targets, weight=weights) # Add class weights from dataset
            if model_name == 'pointnet':
                loss += feature_transform_reguliarzer(trans_feat) * 0.001
            graph_losses.append(loss.cpu().data.numpy())
            steps.append(step)
            if counter % 100 == 0:
                print("LOSS IS: {}".format(loss.cpu().data.numpy()))
            #print((loss.cpu().data.numpy()))
            history['loss'].append(loss.cpu().data.numpy())
            loss.backward()
            optimizer.step()
            counter += 1
            step += 1
            #if counter > 3:
            #     break
        if USE_CONMAT:
            print("CONFUSION MATRIX: \n {}".format(conf_matrix))
        pointnet2 = model_name == 'pointnet2'
        test_metrics, test_hist_acc, cat_mean_iou = test_semseg(model.eval(), testdataloader, seg_label_to_cat,\
                                                                num_classes = num_classes,pointnet2=pointnet2)
        mean_iou = np.mean(cat_mean_iou)
        print('Epoch %d  %s accuracy: %f  meanIOU: %f' %
              (epoch, blue('test'), test_metrics['accuracy'], mean_iou))
        logger.info('Epoch %d  %s accuracy: %f  meanIOU: %f' %
                    (epoch, 'test', test_metrics['accuracy'], mean_iou))
        if test_metrics['accuracy'] > best_acc:
            best_acc = test_metrics['accuracy']
            print("HERE")
            save_path = '%s/%s_%.3d_%.4f.pth' % (checkpoints_dir, model_name,
                                                 epoch, best_acc)
            torch.save(model.state_dict(), save_path)
            logger.info(cat_mean_iou)
            logger.info('Save model..')
            print('Save model..')
            print(cat_mean_iou)  #
        if mean_iou > best_meaniou:
            best_meaniou = mean_iou
        print('Best accuracy is: %.5f' % best_acc)
        logger.info('Best accuracy is: %.5f' % best_acc)
        print('Best meanIOU is: %.5f' % best_meaniou)
        logger.info('Best meanIOU is: %.5f' % best_meaniou)
        if USE_CONMAT:
            logger.info('Confusion matrix is: \n {}'.format(conf_matrix))

        # Plot loss vs. steps
        plt.plot(steps, graph_losses)
        plt.xlabel("Batched Steps (Batch Size = {}".format(batch_size))
        plt.ylabel("Multiclass NLL Loss")
        plt.title("NLL Loss vs. Number of Batched Steps")

        # Make directory for loss and other plots
        graphs_dir = os.path.join(experiment_dir, "graphs")
        os.makedirs(graphs_dir, exist_ok=True)

        # Save and close figure
        plt.savefig(os.path.join(graphs_dir, "losses.png"))
        plt.clf()
Beispiel #24
0
          total_epoch_loss = running_loss / dataset_sizes[phase]

          if phase == 'train':
            cls_loss_values.append(total_cls_loss)
            reg_loss_values.append(total_reg_loss)
            tot_loss_values.append(total_epoch_loss)
          if phase == 'valid':
            val_cls_loss_values.append(total_cls_loss)
            val_reg_loss_values.append(total_reg_loss)
            val_tot_loss_values.append(total_epoch_loss)  
          
          print('{} rpn_cls Loss: {:.4f}       {} rpn_reg Loss: {:.4f}       {} Total Loss: {:.4f}'.format(phase, total_cls_loss,phase, total_reg_loss,phase, total_epoch_loss))

          # deep copy the model
          # if phase == 'train' and total_epoch_loss < best_loss:
          #     best_loss = total_epoch_loss
          #     model_wts = copy.deepcopy(model.state_dict())

          
      print()
  save_loss_graphs(cls_loss_values,reg_loss_values,tot_loss_values,val_cls_loss_values,val_reg_loss_values,val_tot_loss_values,start_epoch,num_epochs, exp)
  time_elapsed = time.time() - since
  print('Training complete in {:.0f}m {:.0f}s'.format(
      time_elapsed // 60, time_elapsed % 60))
  # print('Best Loss: {:4f}'.format(best_loss))
  torch.save(model,'saved_models/'+str(exp)+'/rpn_'+str(start_epoch+num_epochs-1)+'.pth')


fl = FocalLoss()
train_model(rpn, fl, 0, parser.e, parser.exp, torch.cuda.is_available())
Beispiel #25
0
    checkpoint = torch.load('%s/ckpt.t7' % ckpt_path)
    net = checkpoint['net']
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']
else:
    net = VGG('VGG11', 4)
    #from torchvision.models.vgg import *
    #net = vgg11_bn(num_classes=4)
tensorboard_logger.configure(log_path)
if use_cuda:
    net.cuda()
    #net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
    cudnn.benchmark = True

#criterion = nn.CrossEntropyLoss()
criterion = FocalLoss(4, use_cuda)
optimizer = optim.Adam(net.parameters(),
                       lr=args.lr,
                       betas=(0.5, 0.999),
                       weight_decay=1e-6)


def train(epoch):
    global optimizer
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        #if batch_idx > 10:
}, {
    'params': param_groups[2],
    'lr': 10 * args.lr,
    'weight_decay': args.weight_decay
}, {
    'params': param_groups[3],
    'lr': 20 * args.lr,
    'weight_decay': 0
}],
                          lr=args.lr,
                          weight_decay=args.weight_decay,
                          max_step=len(train_loader) * args.epochs)

if args.fl:
    ## focal loss
    criterion = FocalLoss()
else:
    ## frequency-based weighting
    class_weight = 45117 / torch.tensor([
        30160, 2, 9757, 1004, 4, 205, 833, 4, 252, 21, 7, 1366, 1323, 83, 56,
        26, 1, 13
    ]).cuda()
    criterion = nn.CrossEntropyLoss(weight=class_weight)

for current_epoch in range(model.epochs):

    model.epoch = current_epoch

    print("Training epoch...")
    model.train_epoch(train_loader, optimizer, criterion)
Beispiel #27
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    # if args.tensorboard: configure("runs/%s"%(args.name))

    # Data loading code
    normalize = transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

    # Data augmentation
    if args.augment:
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])

    # transform_test = transforms.Compose([
    #     transforms.ToTensor(),
    #     normalize
    #     ])
    #normalize = transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])

    # load Data
    train_dataset = datasets.ImageFolder(train_dirs, transform_train)
    #pdb.set_trace()
    val_dataset = datasets.ImageFolder(val_dirs, transform_train)
    kwargs = {'num_workers': 0, 'pin_memory': True}
    train_loader = torch.utils.data.DataLoader(
        #datasets.CIFAR10('../data', train=True, download=True,transform=transform_train),
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)
    val_loader = torch.utils.data.DataLoader(
        #datasets.CIFAR10('../data', train=False, transform=transform_test),
        val_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)

    # create model
    model = dn.DenseNet3(args.layers,
                         3,
                         args.growth,
                         reduction=args.reduce,
                         bottleneck=args.bottleneck,
                         dropRate=args.droprate,
                         small_inputs=False)

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # for training on multiple GPUs.
    # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use
    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()

    # for network visualization in tensorboard
    dummy_input = torch.rand(20, 3, 200, 200).cuda()
    writer.add_graph(model, (dummy_input, ))

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # define loss function (criterion) and optimizer
    # criterion = nn.CrossEntropyLoss().cuda()
    criterion = FocalLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                nesterov=True,
                                weight_decay=args.weight_decay)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
    print('Best accuracy: ', best_prec1)
Beispiel #28
0
                          momentum=0.85,
                          nesterov=True)
    optimizer = adam  # selected optimizer

    # learning rate scheduler
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.4, patience=20, verbose=True,min_lr=1e-5)

    # weighted cross entropy loss function for training
    counter = Counter(train_fold_labels.numpy().T.reshape(1,
                                                          -1)[0, :].tolist())
    mw = max([counter[x] for x in range(NUM_CLASSES)])
    weight = torch.tensor([mw / counter[x]
                           for x in range(NUM_CLASSES)]).to(device)
    # print ("Weights: ", [mw/counter[x] for x in range(NUM_CLASSES)])
    loss_fn = torch.nn.CrossEntropyLoss(weight=weight)
    loss_fn = FocalLoss(class_num=2, gamma=2, alpha=weight)

    # # weighted cross entropy loss for validation dataset
    # counter = Counter(valid_fold_labels.numpy().T.reshape(1,-1)[0,:].tolist())
    # mw =  max([counter[x] for x in range(NUM_CLASSES)])
    # weight = torch.tensor([mw/counter[x] for x in range(NUM_CLASSES)]).to(device)
    # # valid_loss_fn = torch.nn.CrossEntropyLoss(weight=weight)
    # valid_loss_fn = FocalLoss(class_num=2, gamma=1, alpha=weight)

    # scale all samples according to training set
    scaler = preprocessing.MinMaxScaler().fit(train_fold_data.numpy())
    train_fold_data_normalized = torch.from_numpy(
        scaler.transform(train_fold_data.numpy())).float().to(device)
    test_fold_data_normalized = torch.from_numpy(
        scaler.transform(test_fold_data.numpy())).float().to(device)
    # valid_fold_data_normalized = torch.from_numpy(scaler.transform(valid_fold_data.numpy())).float().to(device)
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = float('-inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        else:
            raise TypeError('network {} is not supported.'.format(
                args.network))

        if args.pretrained:
            model.load_state_dict(torch.load('insight-face-v3.pt'))

        model = nn.DataParallel(model)
        metric_fc = ArcMarginModel(args)
        metric_fc = nn.DataParallel(metric_fc)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                        lr=args.lr,
                                        momentum=args.mom,
                                        nesterov=True,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma)
    else:
        criterion = nn.CrossEntropyLoss()

    # Custom dataloaders
    # train_dataset = ArcFaceDataset('train')
    # train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True,
    #                                            num_workers=num_workers)
    train_dataset = ArcFaceDatasetBatched('train', img_batch_size)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size //
                                               img_batch_size,
                                               shuffle=True,
                                               num_workers=num_workers,
                                               collate_fn=batched_collate_fn)

    scheduler = MultiStepLR(optimizer, milestones=[8, 16, 24, 32], gamma=0.1)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        lr = optimizer.param_groups[0]['lr']
        logger.info('\nCurrent effective learning rate: {}\n'.format(lr))
        # print('Step num: {}\n'.format(optimizer.step_num))
        writer.add_scalar('model/learning_rate', lr, epoch)

        # One epoch's training
        train_loss, train_top1_accs = train(train_loader=train_loader,
                                            model=model,
                                            metric_fc=metric_fc,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            epoch=epoch)

        writer.add_scalar('model/train_loss', train_loss, epoch)
        writer.add_scalar('model/train_accuracy', train_top1_accs, epoch)

        scheduler.step(epoch)

        if args.eval_ds == "LFW":
            from lfw_eval import lfw_test

            # One epochs's validata
            accuracy, threshold = lfw_test(model)

        elif args.eval_ds == "Megaface":
            from megaface_eval import megaface_test

            accuracy = megaface_test(model)

        else:
            accuracy = -1

        writer.add_scalar('model/evaluation_accuracy', accuracy, epoch)

        # Check if there was an improvement
        is_best = accuracy > best_acc
        best_acc = max(accuracy, best_acc)
        if not is_best:
            epochs_since_improvement += 1
            logger.info("\nEpochs since last improvement: %d\n" %
                        (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                        optimizer, best_acc, is_best, scheduler)
def run(train_sets, valid_sets, idx, save_dr):
    batch_size = 8
    imagenet_data = ImageFolder(train_sets, transform=data_transforms['train'])
    test_data = ImageFolder(valid_sets, transform=data_transforms['val'])
    data_loader = DataLoader(imagenet_data,
                             batch_size=batch_size,
                             shuffle=True)
    test_data_loader = DataLoader(test_data, batch_size=1, shuffle=True)

    cls_num = len(imagenet_data.class_to_idx)
    model = inceptionresnetv2(num_classes=1001, pretrained=None)
    model.load_state_dict(
        torch.load('/home/dsl/all_check/inceptionresnetv2-520b38e4.pth'),
        strict=True)
    model.last_linear = nn.Linear(1536, cls_num)
    model.cuda()
    state = {'learning_rate': 0.01, 'momentum': 0.9, 'decay': 0.0005}
    #optimizer = torch.optim.SGD(model.parameters(), state['learning_rate'], momentum=state['momentum'],
    #weight_decay=state['decay'], nesterov=True)

    optimizer = torch.optim.Adam(model.parameters(),
                                 state['learning_rate'],
                                 weight_decay=state['decay'],
                                 amsgrad=True)

    state['label_ix'] = imagenet_data.class_to_idx
    state['cls_name'] = idx

    state['best_accuracy'] = 0
    sch = lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                         factor=0.9,
                                         patience=3)

    focal_loss = FocalLoss(gamma=2)
    focal_loss.cuda()

    def train():
        model.train()
        loss_avg = 0.0
        progress = ProgressBar()
        ip1_loader = []
        idx_loader = []
        correct = 0
        for (data, target) in progress(data_loader):
            data, target = torch.autograd.Variable(
                data.cuda()), torch.autograd.Variable(target.cuda())
            output = model(data)
            pred = output.data.max(1)[1]
            correct += float(pred.eq(target.data).sum())
            optimizer.zero_grad()
            loss = focal_loss(output, target)
            loss.backward()
            optimizer.step()
            loss_avg = loss_avg * 0.2 + float(loss) * 0.8
            print(correct, len(data_loader.dataset), loss_avg)
        state['train_accuracy'] = correct / len(data_loader.dataset)
        state['train_loss'] = loss_avg

    def test():
        with torch.no_grad():
            model.eval()
            loss_avg = 0.0
            correct = 0
            for (data, target) in test_data_loader:

                data, target = torch.autograd.Variable(
                    data.cuda()), torch.autograd.Variable(target.cuda())
                output = model(data)
                loss = F.cross_entropy(output, target)
                pred = output.data.max(1)[1]
                correct += float(pred.eq(target.data).sum())
                loss_avg += float(loss)
                state['test_loss'] = loss_avg / len(test_data_loader.dataset)
                state['test_accuracy'] = correct / len(
                    test_data_loader.dataset)
            print(state['test_accuracy'])

    best_accuracy = 0.0
    for epoch in range(40):
        state['epoch'] = epoch
        train()
        test()
        sch.step(state['train_accuracy'])
        best_accuracy = (state['train_accuracy'] + state['test_accuracy']) / 2

        if best_accuracy > state['best_accuracy']:
            state['best_accuracy'] = best_accuracy
            torch.save(model.state_dict(), os.path.join(save_dr, idx + '.pth'))
            with open(os.path.join(save_dr, idx + '.json'), 'w') as f:
                f.write(json.dumps(state))
                f.flush()
        print(state)
        print("Best accuracy: %f" % state['best_accuracy'])

        if state['test_accuracy'] == 1 and epoch > 10:
            break