Ejemplo n.º 1
0
def train(net, data_file, epochs, lr):
    transforms = T.Compose([
        T.Resize(size=(256, 256)),
        T.RandomHorizontalFlip(p=0.5),
        T.ToTensor(),
        T.Normalize([0.56687369, 0.44000871, 0.39886727],
                    [0.2415682, 0.2131414, 0.19494878])
    ])
    dataset = MyDataset(data_file, transforms)
    model = net
    data_loader = DataLoader(dataset, batch_size=24, shuffle=True)
    if torch.cuda.is_available():
        model.cuda()
    # print(model)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=lr,
                                momentum=0.9,
                                weight_decay=5e-4,
                                nesterov=True)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=8,
                                                   gamma=0.1)
    loss_func = nn.CrossEntropyLoss()
    model.train(True)
    num_epochs = 0
    writer = tb.SummaryWriter()
    for epoch in range(epochs):
        for index, data in enumerate(data_loader):
            im, label = data
            # print(label)
            label = label.long()
            if torch.cuda.is_available():
                im = im.cuda()
                label = label.cuda()
            optimizer.zero_grad()
            out = model(im)
            loss = loss_func(out, label)
            loss.backward()
            optimizer.step()
            num_epochs += 1
            writer.add_scalar('loss', loss, num_epochs)
            if index % 10 == 0 or index == len(data_loader) - 1:
                print(
                    '{} / {} learning rate: {} : {} / {} -----------> loss: {}'
                    .format(epoch + 1, epochs,
                            lr_scheduler.get_lr()[0], index + 1,
                            len(data_loader), loss))
        if (epoch + 1) % 2 == 0:
            save_network(net, epoch + 1)
        lr_scheduler.step()

    writer.close()
Ejemplo n.º 2
0
def train_model(model,
                model_test,
                criterion,
                optimizer,
                scheduler,
                num_epochs=25):
    since = time.time()

    #best_model_wts = model.state_dict()
    #best_acc = 0.0
    warm_up = 0.1  # We start from the 0.1*lrRate
    warm_iteration = round(dataset_sizes['satellite'] /
                           opt.batchsize) * opt.warm_epoch  # first 5 epoch

    for epoch in range(num_epochs - start_epoch):
        epoch = epoch + start_epoch
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:
            if phase == 'train':
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0
            running_corrects2 = 0.0
            running_corrects3 = 0.0
            # Iterate over data.
            for data, data2, data3, data4 in zip(dataloaders['satellite'],
                                                 dataloaders['street'],
                                                 dataloaders['drone'],
                                                 dataloaders['google']):
                # get the inputs
                inputs, labels = data
                inputs2, labels2 = data2
                inputs3, labels3 = data3
                inputs4, labels4 = data4
                now_batch_size, c, h, w = inputs.shape
                if now_batch_size < opt.batchsize:  # skip the last batch
                    continue
                if use_gpu:
                    inputs = Variable(inputs.cuda().detach())
                    inputs2 = Variable(inputs2.cuda().detach())
                    inputs3 = Variable(inputs3.cuda().detach())
                    labels = Variable(labels.cuda().detach())
                    labels2 = Variable(labels2.cuda().detach())
                    labels3 = Variable(labels3.cuda().detach())
                    if opt.extra_Google:
                        inputs4 = Variable(inputs4.cuda().detach())
                        labels4 = Variable(labels4.cuda().detach())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                if phase == 'val':
                    with torch.no_grad():
                        outputs, outputs2 = model(inputs, inputs2)
                else:
                    if opt.views == 2:
                        outputs, outputs2 = model(inputs, inputs2)
                    elif opt.views == 3:
                        if opt.extra_Google:
                            outputs, outputs2, outputs3, outputs4 = model(
                                inputs, inputs2, inputs3, inputs4)
                        else:
                            outputs, outputs2, outputs3 = model(
                                inputs, inputs2, inputs3)
                _, preds = torch.max(outputs.data, 1)
                _, preds2 = torch.max(outputs2.data, 1)

                if opt.views == 2:
                    loss = criterion(outputs, labels) + criterion(
                        outputs2, labels2)
                elif opt.views == 3:
                    _, preds3 = torch.max(outputs3.data, 1)
                    loss = criterion(outputs, labels) + criterion(
                        outputs2, labels2) + criterion(outputs3, labels3)
                    if opt.extra_Google:
                        loss += criterion(outputs4, labels4)
                # backward + optimize only if in training phase
                if epoch < opt.warm_epoch and phase == 'train':
                    warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
                    loss *= warm_up

                if phase == 'train':
                    if fp16:  # we use optimier to backward loss
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    optimizer.step()
                    ##########
                    if opt.moving_avg < 1.0:
                        update_average(model_test, model, opt.moving_avg)

                # statistics
                if int(version[0]) > 0 or int(
                        version[2]
                ) > 3:  # for the new version like 0.4.0, 0.5.0 and 1.0.0
                    running_loss += loss.item() * now_batch_size
                else:  # for the old version like 0.3.0 and 0.3.1
                    running_loss += loss.data[0] * now_batch_size
                running_corrects += float(torch.sum(preds == labels.data))
                running_corrects2 += float(torch.sum(preds2 == labels2.data))
                if opt.views == 3:
                    running_corrects3 += float(
                        torch.sum(preds3 == labels3.data))

            epoch_loss = running_loss / dataset_sizes['satellite']
            epoch_acc = running_corrects / dataset_sizes['satellite']
            epoch_acc2 = running_corrects2 / dataset_sizes['satellite']

            if opt.views == 2:
                print(
                    '{} Loss: {:.4f} Satellite_Acc: {:.4f}  Street_Acc: {:.4f}'
                    .format(phase, epoch_loss, epoch_acc, epoch_acc2))
            elif opt.views == 3:
                epoch_acc3 = running_corrects3 / dataset_sizes['satellite']
                print(
                    '{} Loss: {:.4f} Satellite_Acc: {:.4f}  Street_Acc: {:.4f} Drone_Acc: {:.4f}'
                    .format(phase, epoch_loss, epoch_acc, epoch_acc2,
                            epoch_acc3))

            y_loss[phase].append(epoch_loss)
            y_err[phase].append(1.0 - epoch_acc)
            # deep copy the model
            if phase == 'train':
                scheduler.step()
            last_model_wts = model.state_dict()
            if epoch % 20 == 19:
                save_network(model, opt.name, epoch)
            #draw_curve(epoch)

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    #print('Best val Acc: {:4f}'.format(best_acc))
    #save_network(model_test, opt.name+'adapt', epoch)

    return model
Ejemplo n.º 3
0
def train(model, criterion, optimizer, scheduler, dataloader, num_epochs,
          device):
    '''
        train
    '''
    start_time = time.time()

    # Logger instance
    logger = utils.Logger(save_dir_path)
    logger.info('-' * 10)
    logger.info(vars(args))

    for epoch in range(num_epochs):
        logger.info('Epoch {}/{}'.format(epoch + 1, num_epochs))

        model.train()
        adjust_lr(epoch)

        # Training
        running_loss = 0.0
        batch_num = 0
        for inputs, labels in dataloader:
            batch_num += 1

            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            # with torch.set_grad_enabled(True):
            outputs = model(inputs)

            # Sum up the stripe softmax loss
            loss = 0
            if isinstance(outputs, (list, )):
                for logits in outputs:
                    stripe_loss = criterion(logits, labels)
                    loss += stripe_loss
            elif isinstance(outputs, (torch.Tensor, )):
                loss = criterion(outputs, labels)
            else:
                raise Exception('outputs type is error !')

            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(dataloader.dataset.imgs)
        logger.info('Training Loss: {:.4f}'.format(epoch_loss))

        # Save result to logger
        logger.x_epoch_loss.append(epoch + 1)
        logger.y_train_loss.append(epoch_loss)

        if (epoch + 1) % 20 == 0 or epoch + 1 == num_epochs:
            # Testing / Validating
            torch.cuda.empty_cache()
            CMC, mAP = test(model, args.dataset, args.dataset_path, 512)
            logger.info('Testing: top1:%.2f top5:%.2f top10:%.2f mAP:%.2f' %
                        (CMC[0], CMC[4], CMC[9], mAP))

            logger.x_epoch_test.append(epoch + 1)
            logger.y_test['top1'].append(CMC[0])
            logger.y_test['mAP'].append(mAP)
            if epoch + 1 != num_epochs:
                utils.save_network(model, save_dir_path, str(epoch + 1))
        logger.info('-' * 10)

    # Save the loss curve
    logger.save_curve()

    time_elapsed = time.time() - start_time
    logger.info('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    # Save final model weights
    utils.save_network(model, save_dir_path, 'final')
Ejemplo n.º 4
0
    def run(self):
        # For each module, check we have pre-trained modules and load them
        print("-------------------------------------------------")
        print(" Looking for previous results ")
        print("-------------------------------------------------")
        for _key in ["kp", "ori", "desc", "joint"]:
            restore_network(self, _key)

        print("-------------------------------------------------")
        print(" Training ")
        print("-------------------------------------------------")

        subtask = self.config.subtask
        batch_size = self.config.batch_size
        for step in trange(int(self.best_step[subtask]),
                           int(self.config.max_step),
                           desc="Subtask = {}".format(subtask),
                           ncols=self.config.tqdm_width):
            # ----------------------------------------
            # Forward pass: Note that we only compute the loss in the forward
            # pass. We don't do summary writing or saving
            fw_data = []
            fw_loss = []
            batches = self.hardmine_scheduler(self.config, step)
            for num_cur in batches:
                cur_data = self.dataset.next_batch(task="train",
                                                   subtask=subtask,
                                                   batch_size=num_cur,
                                                   aug_rot=self.use_aug_rot)
                cur_loss = self.network.forward(subtask, cur_data)
                # Sanity check
                if min(cur_loss) < 0:
                    raise RuntimeError('Negative loss while mining?')
                # Data may contain empty (zero-value) samples: set loss to zero
                if num_cur < batch_size:
                    cur_loss[num_cur - batch_size:] = 0
                fw_data.append(cur_data)
                fw_loss.append(cur_loss)
            # Fill a single batch with hardest
            if len(batches) > 1:
                cur_data = get_hard_batch(fw_loss, fw_data)
            # ----------------------------------------
            # Backward pass: Note that the backward pass returns summary only
            # when it is asked. Also, we manually keep note of step here, and
            # not use the tensorflow version. This is to simplify the migration
            # to another framework, if needed.
            do_validation = step % self.config.validation_interval == 0
            cur_summary = self.network.backward(subtask,
                                                cur_data,
                                                provide_summary=do_validation)
            if do_validation and cur_summary is not None:
                # Make sure we have the summary data
                assert cur_summary is not None
                # Write training summary
                self.summary_writer[subtask].add_summary(cur_summary, step)
                # Do multiple rounds of validation
                cur_val_loss = np.zeros(self.config.validation_rounds)
                for _val_round in xrange(self.config.validation_rounds):
                    # Fetch validation data
                    cur_data = self.dataset.next_batch(
                        task="valid",
                        subtask=subtask,
                        batch_size=batch_size,
                        aug_rot=self.use_aug_rot)
                    # Perform validation of the model using validation data
                    cur_val_loss[_val_round] = self.network.validate(
                        subtask, cur_data)
                cur_val_loss = np.mean(cur_val_loss)
                # Inject validation result to summary
                summaries = [
                    tf.Summary.Value(
                        tag="validation/err-{}".format(subtask),
                        simple_value=cur_val_loss,
                    )
                ]
                self.summary_writer[subtask].add_summary(
                    tf.Summary(value=summaries), step)
                # Flush the writer
                self.summary_writer[subtask].flush()

                # TODO: Repeat without augmentation if necessary
                # ...

                if cur_val_loss < self.best_val_loss[subtask]:
                    self.best_val_loss[subtask] = cur_val_loss
                    self.best_step[subtask] = step
                    save_network(self, subtask)
Ejemplo n.º 5
0
def main():
    setup_train_experiment(logger, FLAGS, "%(model)s_at")

    logger.info("Loading data...")
    data = mnist_load(FLAGS.train_size, FLAGS.seed)
    X_train, y_train = data.X_train, data.y_train
    X_val, y_val = data.X_val, data.y_val
    X_test, y_test = data.X_test, data.y_test

    img_shape = [None, 1, 28, 28]
    train_images = T.tensor4('train_images')
    train_labels = T.lvector('train_labels')
    val_images = T.tensor4('valid_labels')
    val_labels = T.lvector('valid_labels')

    layer_dims = [int(dim) for dim in FLAGS.layer_dims.split("-")]
    num_classes = layer_dims[-1]
    net = create_network(FLAGS.model, img_shape, layer_dims=layer_dims)
    model = with_end_points(net)

    train_outputs = model(train_images)
    val_outputs = model(val_images, deterministic=True)

    # losses
    train_ce = categorical_crossentropy(train_outputs['prob'],
                                        train_labels).mean()
    train_at = adversarial_training(lambda x: model(x)['prob'],
                                    train_images,
                                    train_labels,
                                    epsilon=FLAGS.epsilon).mean()
    train_loss = train_ce + FLAGS.lmbd * train_at
    val_ce = categorical_crossentropy(val_outputs['prob'], val_labels).mean()
    val_deepfool_images = deepfool(
        lambda x: model(x, deterministic=True)['logits'],
        val_images,
        val_labels,
        num_classes,
        max_iter=FLAGS.deepfool_iter,
        clip_dist=FLAGS.deepfool_clip,
        over_shoot=FLAGS.deepfool_overshoot)

    # metrics
    train_acc = categorical_accuracy(train_outputs['logits'],
                                     train_labels).mean()
    train_err = 1.0 - train_acc
    val_acc = categorical_accuracy(val_outputs['logits'], val_labels).mean()
    val_err = 1.0 - val_acc
    # deepfool robustness
    reduc_ind = range(1, train_images.ndim)
    l2_deepfool = (val_deepfool_images - val_images).norm(2, axis=reduc_ind)
    l2_deepfool_norm = l2_deepfool / val_images.norm(2, axis=reduc_ind)

    train_metrics = OrderedDict([('loss', train_loss), ('nll', train_ce),
                                 ('at', train_at), ('err', train_err)])
    val_metrics = OrderedDict([('nll', val_ce), ('err', val_err)])
    summary_metrics = OrderedDict([('l2', l2_deepfool.mean()),
                                   ('l2_norm', l2_deepfool_norm.mean())])

    lr = theano.shared(floatX(FLAGS.initial_learning_rate), 'learning_rate')
    train_params = get_all_params(net, trainable=True)
    train_updates = adam(train_loss, train_params, lr)

    logger.info("Compiling theano functions...")
    train_fn = theano.function([train_images, train_labels],
                               outputs=train_metrics.values(),
                               updates=train_updates)
    val_fn = theano.function([val_images, val_labels],
                             outputs=val_metrics.values())
    summary_fn = theano.function([val_images, val_labels],
                                 outputs=summary_metrics.values() +
                                 [val_deepfool_images])

    logger.info("Starting training...")
    try:
        samples_per_class = FLAGS.summary_samples_per_class
        summary_images, summary_labels = select_balanced_subset(
            X_val, y_val, num_classes, samples_per_class)
        save_path = os.path.join(FLAGS.samples_dir, 'orig.png')
        save_images(summary_images, save_path)

        epoch = 0
        batch_index = 0
        while epoch < FLAGS.num_epochs:
            epoch += 1

            start_time = time.time()
            train_iterator = batch_iterator(X_train,
                                            y_train,
                                            FLAGS.batch_size,
                                            shuffle=True)
            epoch_outputs = np.zeros(len(train_fn.outputs))
            for batch_index, (images,
                              labels) in enumerate(train_iterator,
                                                   batch_index + 1):
                batch_outputs = train_fn(images, labels)
                epoch_outputs += batch_outputs
            epoch_outputs /= X_train.shape[0] // FLAGS.batch_size
            logger.info(
                build_result_str(
                    "Train epoch [{}, {:.2f}s]:".format(
                        epoch,
                        time.time() - start_time), train_metrics.keys(),
                    epoch_outputs))

            # update learning rate
            if epoch > FLAGS.start_learning_rate_decay:
                new_lr_value = lr.get_value(
                ) * FLAGS.learning_rate_decay_factor
                lr.set_value(floatX(new_lr_value))
                logger.debug("learning rate was changed to {:.10f}".format(
                    new_lr_value))

            # validation
            start_time = time.time()
            val_iterator = batch_iterator(X_val,
                                          y_val,
                                          FLAGS.test_batch_size,
                                          shuffle=False)
            val_epoch_outputs = np.zeros(len(val_fn.outputs))
            for images, labels in val_iterator:
                val_epoch_outputs += val_fn(images, labels)
            val_epoch_outputs /= X_val.shape[0] // FLAGS.test_batch_size
            logger.info(
                build_result_str(
                    "Test epoch [{}, {:.2f}s]:".format(
                        epoch,
                        time.time() - start_time), val_metrics.keys(),
                    val_epoch_outputs))

            if epoch % FLAGS.summary_frequency == 0:
                summary = summary_fn(summary_images, summary_labels)
                logger.info(
                    build_result_str(
                        "Epoch [{}] adversarial statistics:".format(epoch),
                        summary_metrics.keys(), summary[:-1]))
                save_path = os.path.join(FLAGS.samples_dir,
                                         'epoch-%d.png' % epoch)
                df_images = summary[-1]
                save_images(df_images, save_path)

            if epoch % FLAGS.checkpoint_frequency == 0:
                save_network(net, epoch=epoch)
    except KeyboardInterrupt:
        logger.debug("Keyboard interrupt. Stopping training...")
    finally:
        save_network(net)

    # evaluate final model on test set
    test_iterator = batch_iterator(X_test,
                                   y_test,
                                   FLAGS.test_batch_size,
                                   shuffle=False)
    test_results = np.zeros(len(val_fn.outputs))
    for images, labels in test_iterator:
        test_results += val_fn(images, labels)
    test_results /= X_test.shape[0] // FLAGS.test_batch_size
    logger.info(
        build_result_str("Final test results:", val_metrics.keys(),
                         test_results))
Ejemplo n.º 6
0
def main():

    args = args_initialize()

    save_freq = args.save_freq
    epochs = args.num_epoch
    cuda = args.cuda

    train_dataset = UnalignedDataset(is_train=True)
    train_loader = DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=0
    )

    net_G_A = ResNetGenerator(input_nc=3, output_nc=3)
    net_G_B = ResNetGenerator(input_nc=3, output_nc=3)
    net_D_A = Discriminator()
    net_D_B = Discriminator()

    if args.cuda:
        net_G_A = net_G_A.cuda()
        net_G_B = net_G_B.cuda()
        net_D_A = net_D_A.cuda()
        net_D_B = net_D_B.cuda()

    fake_A_pool = ImagePool(50)
    fake_B_pool = ImagePool(50)

    criterionGAN = GANLoss(cuda=cuda)
    criterionCycle = torch.nn.L1Loss()
    criterionIdt = torch.nn.L1Loss()

    optimizer_G = torch.optim.Adam(
        itertools.chain(net_G_A.parameters(), net_G_B.parameters()),
        lr=args.lr,
        betas=(args.beta1, 0.999)
    )
    optimizer_D_A = torch.optim.Adam(net_D_A.parameters(), lr=args.lr, betas=(args.beta1, 0.999))
    optimizer_D_B = torch.optim.Adam(net_D_B.parameters(), lr=args.lr, betas=(args.beta1, 0.999))

    log_dir = './logs'
    checkpoints_dir = './checkpoints'
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(checkpoints_dir, exist_ok=True)

    writer = SummaryWriter(log_dir)

    for epoch in range(epochs):

        running_loss = np.zeros((8))
        for batch_idx, data in enumerate(train_loader):

            input_A = data['A']
            input_B = data['B']

            if cuda:
                input_A = input_A.cuda()
                input_B = input_B.cuda()

            real_A = Variable(input_A)
            real_B = Variable(input_B)


            """
            Backward net_G
            """
            optimizer_G.zero_grad()
            lambda_idt = 0.5
            lambda_A = 10.0
            lambda_B = 10.0

            # 各 Generatorに変換後の画像を入力
            # 何もしないのが理想の出力
            idt_B = net_G_A(real_B)
            loss_idt_A = criterionIdt(idt_B, real_B) * lambda_B * lambda_idt

            idt_A = net_G_B(real_A)
            loss_idt_B = criterionIdt(idt_A, real_A) * lambda_A * lambda_idt

            # GAN loss = D_A(G_A(A))
            # G_Aとしては生成した偽物画像が本物(True)と判断して欲しい
            fake_B = net_G_A(real_A)
            pred_fake = net_D_A(fake_B)
            loss_G_A = criterionGAN(pred_fake, True)

            fake_A = net_G_B(real_B)
            pred_fake = net_D_B(fake_A)
            loss_G_B = criterionGAN(pred_fake, True)

            rec_A = net_G_B(fake_B)
            loss_cycle_A = criterionCycle(rec_A, real_A) * lambda_A

            rec_B = net_G_A(fake_A)
            loss_cycle_B = criterionCycle(rec_B, real_B) * lambda_B

            loss_G = loss_G_A + loss_G_B + loss_cycle_A + loss_cycle_B + loss_idt_A + loss_idt_B
            loss_G.backward()

            optimizer_G.step()

            """
            update D_A
            """
            optimizer_D_A.zero_grad()
            fake_B = fake_B_pool.query(fake_B.data)

            pred_real = net_D_A(real_B)
            loss_D_real = criterionGAN(pred_real, True)

            pred_fake = net_D_A(fake_B.detach())
            loss_D_fake = criterionGAN(pred_fake, False)

            loss_D_A = (loss_D_real + loss_D_fake) * 0.5
            loss_D_A.backward()

            optimizer_D_A.step()

            """
            update D_B
            """
            optimizer_D_B.zero_grad()
            fake_A = fake_A_pool.query(fake_A.data)

            pred_real = net_D_B(real_A)
            loss_D_real = criterionGAN(pred_real, True)

            pred_fake = net_D_B(fake_A.detach())
            loss_D_fake = criterionGAN(pred_fake, False)

            loss_D_B = (loss_D_real + loss_D_fake) * 0.5
            loss_D_B.backward()


            optimizer_D_B.step()

            ret_loss = np.array([
                loss_G_A.data.detach().cpu().numpy(), loss_D_A.data.detach().cpu().numpy(),
                loss_G_B.data.detach().cpu().numpy(), loss_D_B.data.detach().cpu().numpy(),
                loss_cycle_A.data.detach().cpu().numpy(), loss_cycle_B.data.detach().cpu().numpy(),
                loss_idt_A.data.detach().cpu().numpy(), loss_idt_B.data.detach().cpu().numpy()
            ])
            running_loss += ret_loss

            """
            Save checkpoints
            """
            if (epoch + 1) % save_freq == 0:
                save_network(net_G_A, 'G_A', str(epoch + 1))
                save_network(net_D_A, 'D_A', str(epoch + 1))
                save_network(net_G_B, 'G_B', str(epoch + 1))
                save_network(net_D_B, 'D_B', str(epoch + 1))

        running_loss /= len(train_loader)
        losses = running_loss
        print('epoch %d, losses: %s' % (epoch + 1, running_loss))

        writer.add_scalar('loss_G_A', losses[0], epoch)
        writer.add_scalar('loss_D_A', losses[1], epoch)
        writer.add_scalar('loss_G_B', losses[2], epoch)
        writer.add_scalar('loss_D_B', losses[3], epoch)
        writer.add_scalar('loss_cycle_A', losses[4], epoch)
        writer.add_scalar('loss_cycle_B', losses[5], epoch)
        writer.add_scalar('loss_idt_A', losses[6], epoch)
        writer.add_scalar('loss_idt_B', losses[7], epoch)
Ejemplo n.º 7
0
def train_model(model, model_test, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    # best_model_wts = model.state_dict()
    # best_acc = 0.0
    warm_up = 0.1  # We start from the 0.1*lrRate
    warm_iteration = round(dataset_sizes['satellite'] / opt.batchsize) * opt.warm_epoch  # first 5 epoch

    if opt.arcface:
        criterion_arcface = losses.ArcFaceLoss(num_classes=opt.nclasses, embedding_size=512)
    if opt.cosface:
        criterion_cosface = losses.CosFaceLoss(num_classes=opt.nclasses, embedding_size=512)
    if opt.circle:
        criterion_circle = CircleLoss(m=0.25, gamma=32)  # gamma = 64 may lead to a better result.
    if opt.triplet:
        miner = miners.MultiSimilarityMiner()
        criterion_triplet = losses.TripletMarginLoss(margin=0.3)
    if opt.lifted:
        criterion_lifted = losses.GeneralizedLiftedStructureLoss(neg_margin=1, pos_margin=0)
    if opt.contrast:
        criterion_contrast = losses.ContrastiveLoss(pos_margin=0, neg_margin=1)
    if opt.sphere:
        criterion_sphere = losses.SphereFaceLoss(num_classes=opt.nclasses, embedding_size=512, margin=4)

    for epoch in range(num_epochs - start_epoch):
        epoch = epoch + start_epoch
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:
            if phase == 'train':
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0
            running_corrects2 = 0.0
            running_corrects3 = 0.0
            # Iterate over data.
            for data, data2, data3, data4 in zip(dataloaders['satellite'], dataloaders['street'], dataloaders['drone'],
                                                 dataloaders['google']):
                # get the inputs
                inputs, labels = data
                inputs2, labels2 = data2
                inputs3, labels3 = data3
                inputs4, labels4 = data4
                now_batch_size, c, h, w = inputs.shape
                if now_batch_size < opt.batchsize:  # skip the last batch
                    continue
                if use_gpu:
                    inputs = Variable(inputs.cuda().detach())
                    inputs2 = Variable(inputs2.cuda().detach())
                    inputs3 = Variable(inputs3.cuda().detach())
                    labels = Variable(labels.cuda().detach())
                    labels2 = Variable(labels2.cuda().detach())
                    labels3 = Variable(labels3.cuda().detach())
                    if opt.extra_Google:
                        inputs4 = Variable(inputs4.cuda().detach())
                        labels4 = Variable(labels4.cuda().detach())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                if phase == 'val':
                    with torch.no_grad():
                        outputs, outputs2 = model(inputs, inputs2)
                else:
                    if opt.views == 2:
                        outputs, outputs2 = model(inputs, inputs2)
                    elif opt.views == 3:
                        if opt.extra_Google:
                            outputs, outputs2, outputs3, outputs4 = model(inputs, inputs2, inputs3, inputs4)
                        else:
                            outputs, outputs2, outputs3 = model(inputs, inputs2, inputs3)

                return_feature = opt.arcface or opt.cosface or opt.circle or opt.triplet or opt.contrast or opt.lifted or opt.sphere

                if opt.views == 2:
                    _, preds = torch.max(outputs.data, 1)
                    _, preds2 = torch.max(outputs2.data, 1)
                    loss = criterion(outputs, labels) + criterion(outputs2, labels2)
                elif opt.views == 3:
                    if return_feature:
                        logits, ff = outputs
                        logits2, ff2 = outputs2
                        logits3, ff3 = outputs3
                        fnorm = torch.norm(ff, p=2, dim=1, keepdim=True)
                        fnorm2 = torch.norm(ff2, p=2, dim=1, keepdim=True)
                        fnorm3 = torch.norm(ff3, p=2, dim=1, keepdim=True)
                        ff = ff.div(fnorm.expand_as(ff))  # 8*512,tensor
                        ff2 = ff2.div(fnorm2.expand_as(ff2))
                        ff3 = ff3.div(fnorm3.expand_as(ff3))
                        loss = criterion(logits, labels) + criterion(logits2, labels2) + criterion(logits3, labels3)
                        _, preds = torch.max(logits.data, 1)
                        _, preds2 = torch.max(logits2.data, 1)
                        _, preds3 = torch.max(logits3.data, 1)
                        # Multiple perspectives are combined to calculate losses, please join ''--loss_merge'' in run.sh
                        if opt.loss_merge:
                            ff_all = torch.cat((ff, ff2, ff3), dim=0)
                            labels_all = torch.cat((labels, labels2, labels3), dim=0)
                        if opt.extra_Google:
                            logits4, ff4 = outputs4
                            fnorm4 = torch.norm(ff4, p=2, dim=1, keepdim=True)
                            ff4 = ff4.div(fnorm4.expand_as(ff4))
                            loss = criterion(logits, labels) + criterion(logits2, labels2) + criterion(logits3, labels3) +criterion(logits4, labels4)
                            if opt.loss_merge:
                                ff_all = torch.cat((ff_all, ff4), dim=0)
                                labels_all = torch.cat((labels_all, labels4), dim=0)
                        if opt.arcface:
                            if opt.loss_merge:
                                loss += criterion_arcface(ff_all, labels_all)
                            else:
                                loss += criterion_arcface(ff, labels) + criterion_arcface(ff2, labels2) + criterion_arcface(ff3, labels3)  # /now_batch_size
                                if opt.extra_Google:
                                    loss += criterion_arcface(ff4, labels4)  # /now_batch_size
                        if opt.cosface:
                            if opt.loss_merge:
                                loss += criterion_cosface(ff_all, labels_all)
                            else:
                                loss += criterion_cosface(ff, labels) + criterion_cosface(ff2, labels2) + criterion_cosface(ff3, labels3)  # /now_batch_size
                                if opt.extra_Google:
                                    loss += criterion_cosface(ff4, labels4)  # /now_batch_size
                        if opt.circle:
                            if opt.loss_merge:
                                loss += criterion_circle(*convert_label_to_similarity(ff_all, labels_all)) / now_batch_size
                            else:
                                loss += criterion_circle(*convert_label_to_similarity(ff, labels)) / now_batch_size + criterion_circle(*convert_label_to_similarity(ff2, labels2)) / now_batch_size + criterion_circle(*convert_label_to_similarity(ff3, labels3)) / now_batch_size
                                if opt.extra_Google:
                                    loss += criterion_circle(*convert_label_to_similarity(ff4, labels4)) / now_batch_size
                        if opt.triplet:
                            if opt.loss_merge:
                                hard_pairs_all = miner(ff_all, labels_all)
                                loss += criterion_triplet(ff_all, labels_all, hard_pairs_all)
                            else:
                                hard_pairs = miner(ff, labels)
                                hard_pairs2 = miner(ff2, labels2)
                                hard_pairs3 = miner(ff3, labels3)
                                loss += criterion_triplet(ff, labels, hard_pairs) + criterion_triplet(ff2, labels2, hard_pairs2) + criterion_triplet(ff3, labels3, hard_pairs3)# /now_batch_size
                                if opt.extra_Google:
                                    hard_pairs4 = miner(ff4, labels4)
                                    loss += criterion_triplet(ff4, labels4, hard_pairs4)
                        if opt.lifted:
                            if opt.loss_merge:
                                loss += criterion_lifted(ff_all, labels_all)
                            else:
                                loss += criterion_lifted(ff, labels) + criterion_lifted(ff2, labels2) + criterion_lifted(ff3, labels3)  # /now_batch_size
                                if opt.extra_Google:
                                    loss += criterion_lifted(ff4, labels4)
                        if opt.contrast:
                            if opt.loss_merge:
                                loss += criterion_contrast(ff_all, labels_all)
                            else:
                                loss += criterion_contrast(ff, labels) + criterion_contrast(ff2,labels2) + criterion_contrast(ff3, labels3)  # /now_batch_size
                                if opt.extra_Google:
                                    loss += criterion_contrast(ff4, labels4)
                        if opt.sphere:
                            if opt.loss_merge:
                                loss += criterion_sphere(ff_all, labels_all) / now_batch_size
                            else:
                                loss += criterion_sphere(ff, labels) / now_batch_size + criterion_sphere(ff2, labels2) / now_batch_size + criterion_sphere(ff3, labels3) / now_batch_size
                                if opt.extra_Google:
                                    loss += criterion_sphere(ff4, labels4)

                    else:
                        _, preds = torch.max(outputs.data, 1)
                        _, preds2 = torch.max(outputs2.data, 1)
                        _, preds3 = torch.max(outputs3.data, 1)
                        if opt.loss_merge:
                            outputs_all = torch.cat((outputs, outputs2, outputs3), dim=0)
                            labels_all = torch.cat((labels, labels2, labels3), dim=0)
                            if opt.extra_Google:
                                outputs_all = torch.cat((outputs_all, outputs4), dim=0)
                                labels_all = torch.cat((labels_all, labels4), dim=0)
                            loss = 4*criterion(outputs_all, labels_all)
                        else:
                            loss = criterion(outputs, labels) + criterion(outputs2, labels2) + criterion(outputs3, labels3)
                            if opt.extra_Google:
                                loss += criterion(outputs4, labels4)

                # backward + optimize only if in training phase
                if epoch < opt.warm_epoch and phase == 'train':
                    warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
                    loss *= warm_up

                if phase == 'train':
                    if fp16:  # we use optimier to backward loss
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    optimizer.step()
                    ##########
                    if opt.moving_avg < 1.0:
                        update_average(model_test, model, opt.moving_avg)

                # statistics
                if int(version[0]) > 0 or int(version[2]) > 3:  # for the new version like 0.4.0, 0.5.0 and 1.0.0
                    running_loss += loss.item() * now_batch_size
                else:  # for the old version like 0.3.0 and 0.3.1
                    running_loss += loss.data[0] * now_batch_size
                running_corrects += float(torch.sum(preds == labels.data))
                running_corrects2 += float(torch.sum(preds2 == labels2.data))
                if opt.views == 3:
                    running_corrects3 += float(torch.sum(preds3 == labels3.data))

            epoch_loss = running_loss / dataset_sizes['satellite']
            epoch_acc = running_corrects / dataset_sizes['satellite']
            epoch_acc2 = running_corrects2 / dataset_sizes['satellite']

            if opt.views == 2:
                print('{} Loss: {:.4f} Satellite_Acc: {:.4f}  Street_Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc,
                                                                                         epoch_acc2))
            elif opt.views == 3:
                epoch_acc3 = running_corrects3 / dataset_sizes['satellite']
                print('{} Loss: {:.4f} Satellite_Acc: {:.4f}  Street_Acc: {:.4f} Drone_Acc: {:.4f}'.format(phase,
                                                                                                           epoch_loss,
                                                                                                           epoch_acc,
                                                                                                           epoch_acc2,
                                                                                                           epoch_acc3))

            y_loss[phase].append(epoch_loss)
            y_err[phase].append(1.0 - epoch_acc)
            # deep copy the model
            if phase == 'train':
                scheduler.step()
            last_model_wts = model.state_dict()
            if epoch % 20 == 19:
                save_network(model, opt.name, epoch)
            # draw_curve(epoch)

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    # print('Best val Acc: {:4f}'.format(best_acc))
    # save_network(model_test, opt.name+'adapt', epoch)

    return model
Ejemplo n.º 8
0
from parameter import get_parameter
from utils import load_network, save_network
from train import train_network
from evaluate import test_network
from hardprune import hard_prune_network
from softprune import soft_prune_network

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

if __name__ == '__main__':
    args = get_parameter()

    network = load_network(args)
    print(network)

    if args.train_flag:
        print('args.train_flag:', args.train_flag)
        network = train_network(network, args)
    elif args.hard_prune_flag:
        print('hard_prune_flag:', args.hard_prune_flag)
        network = hard_prune_network(network, args)
    elif args.soft_prune_flag:
        network = soft_prune_network(network, args)

    print(network)
    test_network(network, args)
    # network = train_network(network, args)
    save_network(network, args)
Ejemplo n.º 9
0
def train(model, criterion, optimizer, scheduler, dataloder, text_loader,
          num_epochs, device, stage):
    start_time = time.time()

    # Logger instance
    logger = utils.Logger(save_dir_path)
    logger.info('-' * 10)
    logger.info(vars(arg))
    logger.info('Stage: ' + stage)

    print(
        "################################### Train stage I ######################################"
    )
    for epoch in range(num_epochs):
        logger.info('Epoch {}/{}'.format(epoch + 1, num_epochs))

        model.train()
        scheduler.step()

        ##Training
        running_loss = 0.0
        running_text_loss = 0.0
        batch_num = 0
        img_cor = torch.zeros(1).squeeze().cuda()
        total = torch.zeros(1).squeeze().cuda()
        txt_cor = torch.zeros(1).squeeze().cuda()
        txt_total = torch.zeros(1).squeeze().cuda()

        for (inputs, labels), (text_inputs,
                               text_labels) in zip(dataloder, text_loader):
            batch_num += 1
            inputs = inputs.to(device)
            labels = labels.to(device)
            text_inputs = text_inputs.to(device)
            text_labels = text_labels.to(device, dtype=torch.int64)

            outputs, text_outs = model(inputs, text_inputs)

            ###Intance loss
            loss = criterion(outputs, labels)
            text_loss = criterion(text_outs, text_labels)
            optimizer.zero_grad()
            loss.backward()
            text_loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            running_text_loss += text_loss.item() * text_inputs.size(0)

            #Accurate
            img_pre = torch.argmax(outputs, 1)
            img_cor += (img_pre == labels).sum().float()
            total += len(labels)

            txt_pre = torch.argmax(text_outs, 1)
            txt_cor += (txt_pre == text_labels).sum().float()
            txt_total += len(text_labels)

            if batch_num % 10 == 0:
                logger.info(
                    'Train image epoch : {} [{}/{}]\t Image Loss:{:.6f}\t || Text Loss:{:.6f}'
                    .format(epoch + 1, batch_num * len(inputs),
                            len(dataloder.dataset.imgs),
                            running_loss / (batch_num * arg.batch_size),
                            running_text_loss / (batch_num * arg.batch_size)))

        # logger.info("Img_acc: {:.2f} \t   Txt_acc: {:.2f}".format((img_cor/total).cpu().detach().data.numpy(),(txt_cor/txt_total).cpu().detach().data.numpy()))
        logger.info('Epoch {}:Done!!!'.format(epoch + 1))

        loss_val_runing = 0.0
        loss_val_runing_text = 0.0

        img_cor_val = torch.zeros(1).squeeze().cuda()
        txt_cor_val = torch.zeros(1).squeeze().cuda()
        if (epoch + 1) % 2 == 0 or epoch + 1 == num_epochs:
            ##Testing / Vlidating
            torch.cuda.empty_cache()
            model.mode = 'test'
            CMC, mAP = test(model, arg.datasets, 128)
            logger.info('Testing: Top1:%.2f Top5:%.2f Top10:%.2f mAP:%.2f' %
                        (CMC[0], CMC[4], CMC[9], mAP))
            model.mode = 'train'

            # gallery_dataloder = utils.getDataloader(
            #     arg.datasets,arg.batch_size,'val',shuffle=False,augment=False
            # )
            # text_dataloder = load_data(arg.datasets,'val_npy', arg.batch_size)

            # for (inputs_val,label),(text_inputs_val,text_label) in zip(gallery_dataloder,text_dataloder):
            #     inputs_val = inputs_val.to(device)
            #     label = label.to(device)
            #     text_inputs_val = text_inputs_val.to(device)
            #     text_label = text_label.to(device,dtype=torch.int64)

            #     img_val_out,text_outputs_val = model(inputs_val,text_inputs_val)

            #     loss_img = criterion(img_val_out,label)
            #     loss_val_text = criterion(text_outputs_val,text_label)
            #     loss_val_runing += loss_img.item()*inputs_val.size(0)
            #     loss_val_runing_text += loss_val_text.item()*text_inputs_val.size(0)

            #     #Accurate
            #     img_pre_val = torch.argmax(img_val_out,1)
            #     img_cor_val += (img_pre_val == label).sum().float()
            #     ###
            #     txt_pre_val = torch.argmax(text_outputs_val,1)
            #     txt_cor_val += (txt_pre_val == text_label).sum().float()

            # # logger.info("Img_VAL_acc: {:.2f} \t   Txt_VAL_acc: {:.2f}".format(
            # #     (img_cor_val/len(gallery_dataloder.dataset.imgs)).cpu().detach().data.numpy(),(txt_cor_val/len(text_dataloder.dataset)).cpu().detach().data.numpy()))
            # result = loss_val_runing/len(gallery_dataloder.dataset.imgs)
            # logger.info('[**]Validing image Loss: {:.4f}'.format(result))

            # result_text = loss_val_runing_text/len(text_dataloder.dataset)  ###Note
            # logger.info('[**]Validing text Loss: {:.4f}'.format(result_text))

        logger.info('-' * 10)
    time_elapsed = time.time() - start_time
    logger.info('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    #Save final model weithts
    utils.save_network(model, save_dir_path, 'final')
Ejemplo n.º 10
0
        epoch_acc += acc
        writer_train.add_scalar('Train/Positive Activation', pa, total_iter)
        writer_train.add_scalar('Train/Negtive Activation', na, total_iter)
        writer_train.add_scalar('Train/bce_loss', bce, total_iter)
        writer_train.add_scalar('Train/categery_acc', acc, total_iter)
        total_iter += 1

    progress.finish()
    utils.clear_progressbar()

    print('[%02d] bce: %.5f | acc: %.3f (%d)' % (epoch, epoch_bce/opt.epoch_size, float(epoch_acc*100)/opt.epoch_size, epoch*opt.epoch_size*opt.batch_size))
    
    with open(os.path.join(opt.log_dir,'discriminator_losses%s.txt'  %(opt.dataset)),mode='a') as f:
        f.write('%0.8f %.4f \n' %(epoch_bce/opt.epoch_size, float(epoch_acc)/opt.epoch_size))
    # save the model
    save_network(discriminator, 'discriminator', 'last', opt.log_dir, opt.gpu_ids)
    update_learning_rate(optimizers, epoch, opt.lr_decay_iters, gamma = 0.1)

    # Testing:
    discriminator.eval()
    tsize = 50
    time_wise_pa = np.zeros((tsize, opt.n_future))
    time_wise_na = np.zeros((tsize, opt.n_future))
    time_wise_acc = np.zeros((tsize, opt.n_future))
    
    print('Testing epoch %d'%(epoch))
    progress_test = ProgressBar(widgets=widgets, maxval=tsize).start()
    for k in range(tsize):
        progress_test.update(k+1)
        x = next(testing_batch_generator)
        x = generate_sequence(x)
Ejemplo n.º 11
0
def train_model(model,
                criterion,
                optimizer,
                scheduler,
                stage=None,
                num_epochs=25):
    since = time.time()

    # best_model_wts = model.state_dict()
    # best_acc = 0.0
    warm_up = 0.1  # We start from the 0.1*lrRate
    warm_iteration = round(dataset_sizes['train'] /
                           opt.batchsize) * opt.warm_epoch  # first 5 epoch
    for epoch in range(num_epochs - start_epoch):
        epoch = epoch + start_epoch
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                #scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0
            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data
                now_batch_size, c, h, w = inputs.shape
                if now_batch_size < opt.batchsize:  # skip the last batch
                    continue
                if use_gpu:
                    inputs = Variable(inputs.cuda().detach())
                    labels = Variable(labels.cuda().detach())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                if phase == 'val':
                    with torch.no_grad():
                        outputs = model(inputs)
                else:
                    outputs = model(inputs)

                if not opt.PCB:
                    _, preds = torch.max(outputs.data, 1)
                    loss = criterion(outputs, labels)
                else:
                    part = {}
                    sm = nn.Softmax(dim=1)
                    num_part = opt.parts
                    for i in range(num_part):
                        part[i] = outputs[i]
                    if num_part == 6:
                        score = sm(part[0]) + sm(part[1]) + sm(part[2]) + sm(
                            part[3]) + sm(part[4]) + sm(part[5])
                    elif num_part == 8:
                        score = sm(part[0]) + sm(part[1]) + sm(part[2]) + sm(
                            part[3]) + sm(part[4]) + sm(part[5]) + sm(
                                part[6]) + sm(part[7])
                    else:
                        score = sm(part[0]) + sm(part[1]) + sm(part[2]) + sm(
                            part[3])
                    _, preds = torch.max(score.data, 1)

                    loss = criterion(part[0], labels)
                    for i in range(num_part - 1):
                        loss += criterion(part[i + 1], labels)

                # backward + optimize only if in training phase
                if epoch < opt.warm_epoch and phase == 'train':
                    warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
                    loss *= warm_up

                if phase == 'train':
                    if fp16:  # we use optimier to backward loss
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    optimizer.step()
                    ##########

                # statistics
                if int(version[0]) > 0 or int(
                        version[2]
                ) > 3:  # for the new version like 0.4.0, 0.5.0 and 1.0.0
                    running_loss += loss.item() * now_batch_size
                else:  # for the old version like 0.3.0 and 0.3.1
                    running_loss += loss.data[0] * now_batch_size
                running_corrects += float(torch.sum(preds == labels.data))

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))

            y_loss[phase].append(epoch_loss)
            y_err[phase].append(1.0 - epoch_acc)
            # deep copy the model
            if phase == 'val':
                last_model_wts = model.state_dict()
                if epoch % 10 == 9 or ((stage == 'full' or stage == 'rpp')
                                       and epoch % 10 == 4):
                    save_network(model, epoch, stage)
                draw_curve(epoch, stage)

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    # print('Best val Acc: {:4f}'.format(best_acc))
    model.load_state_dict(last_model_wts)
    save_network(model, 'last', stage)

    return model
Ejemplo n.º 12
0
def train_model(model,
                criterion,
                optimizer,
                scheduler,
                start_epoch=0,
                num_epochs=25):
    since = time.time()

    warm_up = 0.1  # We start from the 0.1*lrRate
    gamma = 0.0  #auto_aug
    warm_iteration = round(dataset_sizes['train'] /
                           opt.batchsize) * opt.warm_epoch  # first 5 epoch
    total_iteration = round(
        dataset_sizes['train'] / opt.batchsize) * num_epochs

    best_model_wts = model.state_dict()
    best_loss = 9999
    best_epoch = 0

    for epoch in range(num_epochs - start_epoch):
        epoch = epoch + start_epoch
        print('gamma: %.4f' % gamma)
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0
            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                if opt.autoaug:
                    inputs, inputs2, labels = data
                    if random.uniform(0, 1) > gamma:
                        inputs = inputs2
                    gamma = min(1.0, gamma + 1.0 / total_iteration)
                else:
                    inputs, labels = data
                now_batch_size, c, h, w = inputs.shape
                if now_batch_size < opt.batchsize:  # skip the last batch
                    continue
                #print(inputs.shape)
                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda().detach())
                    labels = Variable(labels.cuda().detach())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)
                # if we use low precision, input also need to be fp16
                #if fp16:
                #    inputs = inputs.half()

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                if phase == 'val':
                    with torch.no_grad():
                        outputs = model(inputs)
                else:
                    outputs = model(inputs)

                if opt.PCB:
                    part = {}
                    sm = nn.Softmax(dim=1)
                    num_part = 6
                    for i in range(num_part):
                        part[i] = outputs[i]

                    score = sm(part[0]) + sm(part[1]) + sm(part[2]) + sm(
                        part[3]) + sm(part[4]) + sm(part[5])
                    _, preds = torch.max(score.data, 1)

                    loss = criterion(part[0], labels)
                    for i in range(num_part - 1):
                        loss += criterion(part[i + 1], labels)
                elif opt.CPB:
                    part = {}
                    sm = nn.Softmax(dim=1)
                    num_part = 3
                    for i in range(num_part):
                        part[i] = outputs[i]

                    score = sm(part[0]) + sm(part[1]) + sm(part[2])
                    _, preds = torch.max(score.data, 1)

                    loss = criterion(part[0], labels)
                    for i in range(num_part - 1):
                        loss += criterion(part[i + 1], labels)
                else:
                    loss = criterion(outputs, labels)
                    if opt.angle or opt.arc:
                        outputs = outputs[0]
                    _, preds = torch.max(outputs.data, 1)

                # backward + optimize only if in training phase
                if epoch < opt.warm_epoch and phase == 'train':
                    warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
                    loss *= warm_up

                # backward + optimize only if in training phase
                if phase == 'train':
                    if fp16:  # we use optimier to backward loss
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    optimizer.step()

                #print('Iteration: loss:%.2f accuracy:%.2f'%(loss.item(), float(torch.sum(preds == labels.data))/now_batch_size ) )
                # statistics
                if int(version[0]) > 0 or int(
                        version[2]
                ) > 3:  # for the new version like 0.4.0, 0.5.0 and 1.0.0
                    running_loss += loss.item() * now_batch_size
                else:  # for the old version like 0.3.0 and 0.3.1
                    running_loss += loss.data[0] * now_batch_size
                running_corrects += float(torch.sum(preds == labels.data))

                del (loss, outputs, inputs, preds)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))

            y_loss[phase].append(epoch_loss)
            y_err[phase].append(1.0 - epoch_acc)
            # deep copy the model
            if len(opt.gpu_ids) > 1:
                save_network(model.module, opt.name, epoch + 1)
            else:
                save_network(model, opt.name, epoch + 1)
            draw_curve(epoch)

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print()
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            best_epoch = epoch
            last_model_wts = model.state_dict()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best epoch: {:d} Best Train Loss: {:4f}'.format(
        best_epoch, best_loss))

    # load best model weights
    model.load_state_dict(last_model_wts)
    save_network(model, opt.name, 'last')
    return model
Ejemplo n.º 13
0
def train_rank(model, criterion, optimizer, scheduler, dataloder, text_loader,
               num_epochs, device, stage):
    start_time = time.time()
    # Logger instance
    logger = utils.Logger(save_dir_path)
    logger.info('-' * 10)
    logger.info(vars(arg))
    logger.info('Stage: ' + stage)
    print(
        "############################ Train stage II #############################"
    )
    for epoch in range(num_epochs):
        logger.info('Epoch {}/{}'.format(epoch + 1, num_epochs))
        model.train()
        scheduler.step()
        ##Training
        batch_num = 0
        loss_avg = []

        get = list(zip(dataloder, text_loader))
        random.shuffle(get)
        img, txt = zip(*get)

        for (inputs, labels), (text_inputs, text_labels) in zip(img, txt):

            batch_num += 1
            inputs = inputs.to(device)
            labels = labels.to(device)
            text_inputs = text_inputs.to(device)
            text_labels = text_labels.to(device, dtype=torch.int64)

            outputs, text_outs = model(inputs, text_inputs)

            # print("output.shape:: ",outputs.shape)
            # print("text_out.shape:: ",text_outs.shape)
            # print("label.shape: ",labels)
            # print("text_label.shape:: ",text_labels)

            anc_IT, pos_IT, neg_IT = ImageSelector(outputs, text_outs, labels)
            anc_TI, pos_TI, neg_TI = TextSelector(text_outs, outputs, labels)

            loss_rank = criterion(anc_IT, pos_IT, neg_IT) + criterion(
                anc_TI, pos_TI, neg_TI)
            optimizer.zero_grad()
            loss_rank.backward()
            optimizer.step()

            loss_avg.append(loss_rank)
            if batch_num % 10 == 0:
                loss_avg = sum(loss_avg) / len(loss_avg)
                logger.info(
                    'Stage II training : {} [{}]]\t Rank_loss:{:.6f}'.format(
                        epoch + 1, batch_num * len(inputs), loss_avg))
                loss_avg = []

        if (epoch + 1) % 2 == 0 or epoch + 1 == num_epochs:
            ##Testing / Vlidaing
            torch.cuda.empty_cache()
            # model.mode = 'test'
            CMC, mAP = test(model, arg.datasets, 128)
            logger.info('Testing: Top1:%.2f Top5:%.2f Top10:%.2f mAP:%.2f' %
                        (CMC[0], CMC[4], CMC[9], mAP))

        logger.info('-' * 10)
    time_cost = time.time() - start_time
    logger.info('Training complete in {:.0f}m {:.0f}s'.format(
        time_cost // 60, time_cost % 60))
    utils.save_network(model, save_dir_path, 'final_r')
Ejemplo n.º 14
0
def train_model(model,
                criterion,
                optimizer,
                scheduler,
                start_epoch=0,
                num_epochs=25):
    bert_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
    since = time.time()

    warm_up = 0.1  # We start from the 0.1*lrRate
    gamma = 0.0  #auto_aug
    warm_iteration = round(
        dataset_size / opt.batchsize) * opt.warm_epoch * 2  # first 5 epoch
    print(warm_iteration)
    total_iteration = round(dataset_size / opt.batchsize) * num_epochs

    best_model_wts = model.state_dict()
    best_loss = 9999
    best_epoch = 0
    if opt.circle:
        criterion_circle = CircleLoss(m=0.25, gamma=32)

    for epoch in range(num_epochs - start_epoch):
        epoch = epoch + start_epoch
        print('gamma: %.4f' % gamma)
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0
            # Iterate over data.
            with tqdm(dataloader, ascii=True) as tq:
                for data in tq:
                    # zero the parameter gradients
                    if opt.motion:
                        nl, crop, motion, nl_id, crop_id, label = data
                    else:
                        nl, crop, nl_id, crop_id, label = data
                        motion = None
                    tokens = bert_tokenizer.batch_encode_plus(
                        nl, padding='longest', return_tensors='pt')

                    optimizer.zero_grad()
                    loss = compute_loss(model, tokens['input_ids'].cuda(),
                                        tokens['attention_mask'].cuda(),
                                        crop.cuda(), motion, nl_id, crop_id,
                                        label, warm_up)
                    # backward + optimize only if in training phase
                    if epoch < opt.warm_epoch and phase == 'train':
                        warm_up = min(1.0, warm_up + 0.9 / warm_iteration)
                        loss *= warm_up
                # backward + optimize only if in training phase
                    if phase == 'train':
                        if fp16:  # we use optimier to backward loss
                            with amp.scale_loss(loss,
                                                optimizer) as scaled_loss:
                                scaled_loss.backward()
                        else:
                            loss.backward()

                        if opt.sam:
                            optimizer.first_step(zero_grad=True)
                            loss.backward()
                            optimizer.second_step(zero_grad=True)
                        else:
                            optimizer.step()

                # statistics
                    if int(version[0]) > 0 or int(
                            version[2]
                    ) > 3:  # for the new version like 0.4.0, 0.5.0 and 1.0.0
                        running_loss += loss.item() * opt.batchsize
                    else:  # for the old version like 0.3.0 and 0.3.1
                        running_loss += loss.data[0] * now_batch_size

                    del (loss, tokens, data, nl, crop, nl_id, crop_id, label)
            epoch_loss = running_loss / dataset_size

            print('{} Loss: {:.4f}'.format(phase, epoch_loss))

            y_loss[phase].append(epoch_loss)
            # deep copy the model
            #if len(opt.gpu_ids)>1:
            #    save_network(model.module, opt.name, epoch+1)
            #else:
            if epoch % 10 == 0:
                save_network(model, opt.name, epoch + 1)
            draw_curve(epoch)

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print()
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            best_epoch = epoch
            last_model_wts = model.state_dict()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best epoch: {:d} Best Train Loss: {:4f}'.format(
        best_epoch, best_loss))

    # load best model weights
    model.load_state_dict(last_model_wts)
    save_network(model, opt.name, 'last')
    return model