Beispiel #1
0
def main():

    # Paper: In the training phase, we set the batch size to 128,
    # base learning rate to 10−3, weight decay to 5×10−4, and momentum to 0.9

    parser = argparse.ArgumentParser(description='Train - Evaluate DeepCORAL model')
    parser.add_argument('--disable_cuda', action='store_true',
                        help='Disable CUDA')
    parser.add_argument('--epochs', type=int, default=50,
                        help='Number of total epochs to run')
    parser.add_argument('--backbone_network', type=str, default='alexnet',
                        help='Backbone CNN')
    parser.add_argument('--batch_size', type=int, default=128,
                        help='Batch size')
    parser.add_argument('--lr', default=1e-3,
                        help='Learning Rate')
    parser.add_argument('--decay', default=5e-4,
                        help='Decay of the learning rate')
    parser.add_argument('--momentum', default=0.9,
                        help="Optimizer's momentum")
    parser.add_argument('--lambda_coral', type=float, default=0.5,
                        help="Weight that trades off the adaptation with "
                             "classification accuracy on the source domain")
    parser.add_argument('--source', default='amazon',
                        help="Source Domain (dataset)")
    parser.add_argument('--target', default='webcam',
                        help="Target Domain (dataset)")

    args = parser.parse_args()
    args.device = None

    if not args.disable_cuda and torch.cuda.is_available():
        args.device = torch.device('cuda')
    else:
        args.device = torch.device('cpu')

    if args.backbone_network == 'alexnet' or args.backbone_network == 'resnet50':
        if args.source == 'ub':
            source_data_dir = '/home/alejandro/ub/journal_2019/split/domain_adaptation/ub/static/01/train'
        else:
            source_data_dir = None

        if args.target == 'thomaz':
            target_data_dir = '/home/alejandro/ub/journal_2019/split/domain_adaptation/thomaz/static/01/train'
        else:
            target_data_dir = None

        source_train_loader = get_loader(name_dataset=args.source, batch_size=args.batch_size, train=True,
                                         data_dir=source_data_dir)
        target_train_loader = get_loader(name_dataset=args.target, batch_size=args.batch_size, train=True,
                                         data_dir=target_data_dir)

        source_evaluate_loader = get_loader(name_dataset=args.source, batch_size=args.batch_size, train=False,
                                            data_dir=source_data_dir)
        target_evaluate_loader = get_loader(name_dataset=args.target, batch_size=args.batch_size, train=False,
                                            data_dir=target_data_dir)

        n_classes = len(source_train_loader.dataset.classes)
    else:
        source_train_dataset = FeaturesDataset(split_fpath='/home/alejandro/ub/journal_2019/split/domain_adaptation/ub/static/01/cached_fold-01_train.npz')
        target_train_dataset = FeaturesDataset(split_fpath='/home/alejandro/ub/journal_2019/split/domain_adaptation/thomaz/static/01/cached_fold-01_train.npz')

        source_train_loader = torch.utils.data.DataLoader(source_train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4)
        target_train_loader = torch.utils.data.DataLoader(target_train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4)

        source_evaluate_dataset = FeaturesDataset(split_fpath='/home/alejandro/ub/journal_2019/split/domain_adaptation/ub/static/01/cached_fold-01_train.npz')
        target_evaluate_dataset = FeaturesDataset(split_fpath='/home/alejandro/ub/journal_2019/split/domain_adaptation/thomaz/static/01/cached_fold-01_train.npz')

        source_evaluate_loader = torch.utils.data.DataLoader(source_evaluate_dataset, batch_size=args.batch_size,
                                                             shuffle=False, num_workers=4)
        target_evaluate_loader = torch.utils.data.DataLoader(target_evaluate_dataset, batch_size=args.batch_size,
                                                             shuffle=False, num_workers=4)
        n_classes = 17

    # ~ Paper : "We initialized the other layers with the parameters pre-trained on ImageNet"
    # check https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py
    if args.backbone_network == 'alexnet':
        model = alexnet(pretrained=True)

        # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31)
        model.classifier[6] = nn.Linear(4096, n_classes)

        # ~ Paper : and initialized with N(0, 0.005)
        torch.nn.init.normal_(model.classifier[6].weight, mean=0, std=5e-3)

        # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init)
        model.classifier[6].bias.data.fill_(0.01)

        model = model.to(device=args.device)

        # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch."
        optimizer = torch.optim.SGD([
            {'params': model.features.parameters()},
            {'params': model.classifier[:6].parameters()},
            {'params': model.classifier[6].parameters(), 'lr': 10 * args.lr}
        ], lr=args.lr, momentum=args.momentum)  # if not specified, the default lr is used

    elif args.backbone_network == 'resnet50':
        model = resnet50(pretrained=True)

        # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31)
        model.fc = nn.Linear(2048, n_classes)

        # ~ Paper : and initialized with N(0, 0.005)
        torch.nn.init.normal_(model.fc.weight, mean=0, std=5e-3)

        # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init)
        model.fc.bias.data.fill_(0.01)

        model = model.to(device=args.device)

        # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch."
        optimizer = torch.optim.SGD([
            {'params': model.layer4.parameters()},
            {'params': model.fc.parameters(), 'lr': 10 * args.lr}
        ], lr=args.lr, momentum=args.momentum)  # if not specified, the default lr is used
    else:
        model = FrozenCNN()

        # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31)
        model.classifier[0] = nn.Linear(2048, n_classes)

        # ~ Paper : and initialized with N(0, 0.005)
        torch.nn.init.normal_(model.classifier[0].weight, mean=0, std=5e-3)

        # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init)
        model.classifier[0].bias.data.fill_(0.01)

        model = model.to(device=args.device)

        # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch."
        optimizer = torch.optim.SGD([
            {'params': model.classifier[0].parameters(), 'lr': 10 * args.lr}
        ], lr=args.lr, momentum=args.momentum)  # if not specified, the default lr is used

    tracker = Tracker()

    for i in range(args.epochs):
        train(model, optimizer, source_train_loader, target_train_loader, tracker, args, i)
        evaluate(model, source_evaluate_loader, 'source', tracker, args, i)
        evaluate(model, target_evaluate_loader, 'target', tracker, args, i)

    # Save logged classification loss, coral loss, source accuracy, target accuracy
    log_file = "{}_coral-loss:{}_{}-{}_log.pth".format(args.backbone_network, args.lambda_coral, args.source,
                                                       args.target)
    torch.save(tracker.to_dict(), log_file)
Beispiel #2
0
def main(output_dir, n_attentions, image_shape, batch_size, learning_rate,
         gpu):
    """Perform model training"""

    # initialize the dataset
    train_set = TrainDataset(phase='train', shape=image_shape)
    val_set = TrainDataset(phase='val', shape=image_shape)
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8,
                              pin_memory=True)
    val_loader = DataLoader(val_set,
                            batch_size=batch_size,
                            shuffle=True,
                            num_workers=8,
                            pin_memory=True)

    # initialize the model
    model = Model(n_classes=196,
                  input_size=image_shape,
                  n_attentions=n_attentions,
                  gpu=gpu)
    if gpu:
        model = model.cuda()

    # initialize related optimization methods
    criterion = nn.CrossEntropyLoss()
    criterion_attention = nn.MSELoss()
    optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)
    feature_center = torch.zeros(196, n_attentions * 2208)
    scheduler = SuperConvergence(optimizer,
                                 max_lr=learning_rate,
                                 stepsize=5000,
                                 better_as_larger=False,
                                 last_epoch=-1)
    if gpu:
        feature_center = feature_center.cuda()

    # initialize other hyperparameters
    crop_threshold = 0.5
    drop_threshold = 0.5
    focal_weight = 0.4

    # perform the training
    epoch = 0
    while True:
        print('Starting epoch {:03d}'.format(epoch))

        # statistic tracking
        train_loss_tracker = Tracker()
        train_accuracy_tracker = Tracker()

        model = model.train()
        for idx, (X, y) in enumerate(train_loader):
            if gpu:
                X = X.cuda()
                y = y.cuda()

            mini_batch = X.size(0)
            logits, feature_matrix, sampled_attentions = model(X)

            loss = (criterion(logits, y) +
                    criterion_attention(feature_matrix, feature_center[y]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            feature_center[y] = feature_center[y] + (
                focal_weight * (feature_matrix.detach() - feature_center[y]))

            preds, _ = get_predictions(logits.squeeze().cpu().data.numpy())
            preds = np.array(preds) == y.cpu().squeeze().data.numpy()
            accuracy = np.mean(preds)

            train_loss_tracker.step(loss.item() * mini_batch, mini_batch)
            train_accuracy_tracker.step(accuracy * mini_batch, mini_batch)

            # perform data cropping
            with torch.no_grad():
                crop_attentions = F.interpolate(
                    sampled_attentions.unsqueeze(1),
                    size=image_shape,
                    mode='bilinear',
                    align_corners=False)
                crop_attentions = crop_attentions > crop_threshold
                cropped_images = []
                for _idx in range(crop_attentions.size(0)):
                    positive_indices = torch.nonzero(crop_attentions[_idx])
                    x_min = torch.min(positive_indices[:, 2])
                    y_min = torch.min(positive_indices[:, 1])
                    x_max = torch.max(positive_indices[:, 2])
                    y_max = torch.max(positive_indices[:, 1])
                    cropped_image = F.interpolate(
                        crop_attentions[_idx, :, y_min:y_max + 1,
                                        x_min:x_max + 1].float().unsqueeze(0) *
                        X[_idx, :, y_min:y_max + 1,
                          x_min:x_max + 1].unsqueeze(0),
                        size=image_shape,
                        mode='bilinear',
                        align_corners=False)
                    cropped_images.append(cropped_image)
                cropped_images = torch.cat(cropped_images, dim=0)

            logits, _, _ = model(cropped_images)
            loss = criterion(logits, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # perform attention dropping
            with torch.no_grad():
                drop_attentions = F.interpolate(
                    sampled_attentions.unsqueeze(1),
                    size=image_shape,
                    mode='bilinear',
                    align_corners=False)
                drop_attentions = (drop_attentions < drop_threshold).float()
                dropped_images = drop_attentions * X

            logits, _, _ = model(dropped_images)
            loss = criterion(logits, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            stop = (epoch == 10)
            scheduler.step(epoch=None,
                           metrics=train_loss_tracker.get_average(),
                           stop=stop)

            if idx % 100 == 0:
                _temp_lr = optimizer.param_groups[0]['lr']
                print('Batch {}, average loss {} - average accuracy {}, lr {}'.
                      format(idx, train_loss_tracker.get_average(),
                             train_accuracy_tracker.get_average(), _temp_lr))

        # do validation pass
        val_loss_tracker = Tracker()
        val_accuracy_tracker = Tracker()

        model = model.eval()
        for X_val, y_val in val_loader:
            if gpu:
                X_val = X_val.cuda()
                y_val = y_val.cuda()

            mini_batch = X_val.size(0)

            with torch.no_grad():
                logits, _, _ = model(X_val)
                val_loss = criterion(logits, y_val)

                preds, _ = get_predictions(logits.squeeze().cpu().data.numpy())
                preds = np.array(preds) == y_val.cpu().squeeze().data.numpy()
                accuracy = np.mean(preds)

                val_loss_tracker.step(val_loss.item() * mini_batch, mini_batch)
                val_accuracy_tracker.step(accuracy * mini_batch, mini_batch)

        state_dict = {
            'n_classes': 196,
            'input_size': image_shape,
            'n_attentions': n_attentions,
            'state_dict': model.state_dict()
        }
        torch.save(state_dict,
                   os.path.join(output_dir, '{:03d}.ckpt'.format(epoch)))
        print('Validation - loss {}, accuracy {}'.format(
            val_loss_tracker.get_average(),
            val_accuracy_tracker.get_average()))
        epoch += 1
Beispiel #3
0
    def train_model(self,
                    max_iterations=1e6,
                    loss_freq=50,
                    eval_freq=2000,
                    save_freq=1e5,
                    max_gradient_norm=0.25,
                    no_model_checkpoints=False):

        parameters_to_optimize = self.model.parameters()

        # Setup dictionary to save evaluation details in
        checkpoint_dict = self.load_recent_model()
        start_iter = get_param_val(
            checkpoint_dict, "iteration", 0,
            warning_if_default=False)  # Iteration to start from
        evaluation_dict = get_param_val(
            checkpoint_dict,
            "evaluation_dict",
            dict(),
            warning_if_default=False
        )  # Dictionary containing validation performances over time
        best_save_dict = get_param_val(checkpoint_dict,
                                       "best_save_dict", {
                                           "file": None,
                                           "metric": 1e6,
                                           "detailed_metrics": None,
                                           "test": None
                                       },
                                       warning_if_default=False)  #
        best_save_iter = best_save_dict["file"]
        last_save = None if start_iter == 0 else self.get_checkpoint_filename(
            start_iter)
        if last_save is not None and not os.path.isfile(last_save):
            print(
                "[!] WARNING: Could not find last checkpoint file specified as "
                + last_save)
            last_save = None
        test_NLL = None  # Possible test performance determined in the end of the training

        # Initialize tensorboard writer
        writer = SummaryWriter(self.checkpoint_path)

        # Function for saving model. Add here in the dictionary necessary parameters that should be saved
        def save_train_model(iteration, only_weights=True):
            if no_model_checkpoints:
                return
            checkpoint_dict = {
                "iteration": iteration,
                "best_save_dict": best_save_dict,
                "evaluation_dict": evaluation_dict
            }
            self.save_model(iteration,
                            checkpoint_dict,
                            save_optimizer=not only_weights)

        # Function to export the current results to a txt file
        def export_result_txt():
            if best_save_iter is not None:
                with open(os.path.join(self.checkpoint_path, "results.txt"),
                          "w") as f:
                    f.write("Best validation performance: %s\n" %
                            (str(best_save_dict["metric"])))
                    f.write(
                        "Best iteration: %i\n" %
                        int(str(best_save_iter).split("_")[-1].split(".")[0]))
                    f.write("Best checkpoint: %s\n" % str(best_save_iter))
                    f.write("Detailed metrics\n")
                    for metric_name, metric_val in best_save_dict[
                            "detailed_metrics"].items():
                        f.write("-> %s: %s\n" % (metric_name, str(metric_val)))
                    if "test" in best_save_dict and best_save_dict[
                            "test"] is not None:
                        f.write("Test - Detailed metrics\n")
                        for metric_name, metric_val in best_save_dict[
                                "test"].items():
                            f.write("[TEST] -> %s: %s\n" %
                                    (metric_name, str(metric_val)))
                    f.write("\n")

        # "Trackers" are moving averages. We use them to log the loss and time needed per training iteration
        time_per_step = Tracker()
        train_losses = Tracker()

        # Try-catch if user terminates
        try:
            index_iter = -1
            self.model.eval()
            self.task.initialize()
            print("=" * 50 + "\nStarting training...\n" + "=" * 50)
            self.model.train()

            print("Performing initial evaluation...")
            self.model.eval()
            eval_NLL, detailed_scores = self.task.eval(initial_eval=True)
            self.model.train()
            write_dict_to_tensorboard(writer,
                                      detailed_scores,
                                      base_name="eval",
                                      iteration=start_iter)

            for index_iter in range(start_iter, int(max_iterations)):

                # Training step
                start_time = time.time()
                loss = self.task.train_step(iteration=index_iter)
                self.optimizer.zero_grad()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(parameters_to_optimize,
                                               max_gradient_norm)
                if self.model.model_name in ["DAF", "DBF"]:
                    torch.nn.utils.clip_grad_norm_(self.model.base_log_probs,
                                                   max_gradient_norm)
                self.optimizer.step()
                if self.optimizer.param_groups[0]['lr'] > self.lr_minimum:
                    self.lr_scheduler.step()
                end_time = time.time()

                time_per_step.add(end_time - start_time)
                train_losses.add(loss.item())

                # Statement for detecting NaN values
                if torch.isnan(loss).item():
                    print("[!] ERROR: Loss is NaN!" + str(loss.item()))
                for name, param in self.model.named_parameters():
                    if param.requires_grad:
                        if torch.isnan(param).sum() > 0:
                            print("[!] ERROR: Parameter %s has %s NaN values!\n" % (name, str(torch.isnan(param).sum())) + \
                               "Grad values NaN: %s.\n" % (str(torch.isnan(param.grad).sum()) if param.grad is not None else "no gradients") + \
                               "Grad values avg: %s.\n" % (str(param.grad.abs().mean()) if param.grad is not None else "no gradients") + \
                               "Last loss: %s" % (str(loss)))

                # Printing current loss etc. for debugging
                if (index_iter + 1) % loss_freq == 0:
                    loss_avg = train_losses.get_mean(reset=True)
                    bpd_avg = self.task.loss_to_bpd(loss_avg)
                    train_time_avg = time_per_step.get_mean(reset=True)
                    max_memory = torch.cuda.max_memory_allocated(
                        device=get_device(
                        )) / 1.0e9 if torch.cuda.is_available() else -1
                    print(
                        "Training iteration %i|%i (%4.2fs). Loss: %6.5f, Bpd: %6.4f [Mem: %4.2fGB]"
                        % (index_iter + 1, max_iterations, train_time_avg,
                           loss_avg, bpd_avg, max_memory))
                    writer.add_scalar("train/loss", loss_avg, index_iter + 1)
                    writer.add_scalar("train/bpd", bpd_avg, index_iter + 1)
                    writer.add_scalar("train/learning_rate",
                                      self.optimizer.param_groups[0]['lr'],
                                      index_iter + 1)
                    writer.add_scalar("train/training_time", train_time_avg,
                                      index_iter + 1)

                    self.task.add_summary(writer,
                                          index_iter + 1,
                                          checkpoint_path=self.checkpoint_path)

                # Performing evaluation every "eval_freq" steps
                if (index_iter + 1) % eval_freq == 0:
                    self.model.eval()
                    eval_NLL, detailed_scores = self.task.eval()
                    self.model.train()

                    write_dict_to_tensorboard(writer,
                                              detailed_scores,
                                              base_name="eval",
                                              iteration=index_iter + 1)

                    # If model performed better on validation than any other iteration so far => save it and eventually replace old model
                    if eval_NLL < best_save_dict["metric"]:
                        best_save_iter = self.get_checkpoint_filename(
                            index_iter + 1)
                        best_save_dict["metric"] = eval_NLL
                        best_save_dict["detailed_metrics"] = detailed_scores
                        if not os.path.isfile(best_save_iter):
                            print("Saving model at iteration " +
                                  str(index_iter + 1))
                            if best_save_dict[
                                    "file"] is not None and os.path.isfile(
                                        best_save_dict["file"]):
                                print("Removing checkpoint %s..." %
                                      best_save_dict["file"])
                                os.remove(best_save_dict["file"])
                            if last_save is not None and os.path.isfile(
                                    last_save):
                                print("Removing checkpoint %s..." % last_save)
                                os.remove(last_save)
                            best_save_dict["file"] = best_save_iter
                            last_save = best_save_iter
                            save_train_model(index_iter + 1)
                        self.task.export_best_results(self.checkpoint_path,
                                                      index_iter + 1)
                        export_result_txt()
                    evaluation_dict[index_iter + 1] = best_save_dict["metric"]

                # Independent of evaluation, the model is saved every "save_freq" steps. This prevents loss of information if model does not improve for a while
                if (index_iter + 1) % save_freq == 0 and not os.path.isfile(
                        self.get_checkpoint_filename(index_iter + 1)):
                    save_train_model(index_iter + 1)
                    if last_save is not None and os.path.isfile(
                            last_save) and last_save != best_save_iter:
                        print("Removing checkpoint %s..." % last_save)
                        os.remove(last_save)
                    last_save = self.get_checkpoint_filename(index_iter + 1)
            ## End training loop

            # Before testing, load best model and check whether its validation performance is in the right range (to prevent major loading issues)
            if not no_model_checkpoints and best_save_iter is not None:
                load_model(best_save_iter,
                           model=self.model,
                           optimizer=self.optimizer,
                           lr_scheduler=self.lr_scheduler)
                eval_NLL, detailed_scores = self.task.eval()
                if eval_NLL != best_save_dict["metric"]:
                    if abs(eval_NLL - best_save_dict["metric"]) > 1e-1:
                        print(
                            "[!] WARNING: new evaluation significantly differs from saved one (%s vs %s)! Probably a mistake in the saving/loading part..."
                            % (str(eval_NLL), str(best_save_dict["metric"])))
                    else:
                        print(
                            "[!] WARNING: new evaluation sligthly differs from saved one (%s vs %s)."
                            % (str(eval_NLL), str(best_save_dict["metric"])))
            else:
                print("Using last model as no models were saved...")

            # Testing the trained model
            test_NLL, detailed_scores = self.task.test()
            print("=" * 50 + "\nTest performance: %lf" % (test_NLL))
            detailed_scores["original_NLL"] = test_NLL
            best_save_dict["test"] = detailed_scores
            self.task.finalize_summary(writer, max_iterations,
                                       self.checkpoint_path)

        # If user terminates training early, replace last model saved per "save_freq" steps by current one
        except KeyboardInterrupt:
            if index_iter > 0:
                print(
                    "User keyboard interrupt detected. Saving model at step %i..."
                    % (index_iter))
                save_train_model(index_iter + 1)
            else:
                print(
                    "User keyboard interrupt detected before starting to train."
                )
            if last_save is not None and os.path.isfile(last_save) and not any(
                [val == last_save for _, val in best_save_dict.items()]):
                os.remove(last_save)

        export_result_txt()

        writer.close()
def init_model(args):
    import os
    print(os.getcwd())
    source_train_loader = get_loader(name_dataset=args.source,
                                     batch_size=args.batch_size,
                                     train=True,
                                     path="../../data/OfficeCaltech/images/")
    target_train_loader = get_loader(name_dataset=args.target,
                                     batch_size=args.batch_size,
                                     train=True,
                                     path="../../data/OfficeCaltech/images/")

    source_evaluate_loader = get_loader(
        name_dataset=args.source,
        batch_size=args.batch_size,
        train=False,
        path="../../data/OfficeCaltech/images/")
    target_evaluate_loader = get_loader(
        name_dataset=args.target,
        batch_size=args.batch_size,
        train=False,
        path="../../data/OfficeCaltech/images/")

    n_classes = len(source_train_loader.dataset.classes)

    # ~ Paper : "We initialized the other layers with the parameters pre-trained on ImageNet"
    # check https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py
    model = alexnet(pretrained=True)
    # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31)
    model.classifier[6] = nn.Linear(4096, n_classes)
    # ~ Paper : and initialized with N(0, 0.005)
    torch.nn.init.normal_(model.classifier[6].weight, mean=0, std=5e-3)

    # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init)
    model.classifier[6].bias.data.fill_(0.01)

    model = model.to(device=args.device)

    # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch."
    optimizer = torch.optim.SGD(
        [
            {
                'params': model.features.parameters()
            },
            {
                'params': model.classifier[:6].parameters()
            },
            # fc8 -> 7th element (index 6) in the Sequential block
            {
                'params': model.classifier[6].parameters(),
                'lr': 10 * args.lr
            }
        ],
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=args.decay)  # if not specified, the default lr is used

    tracker = Tracker()

    for i in range(args.epochs):
        train(model, optimizer, source_train_loader, target_train_loader,
              tracker, args, i)
        evaluate(model, source_evaluate_loader, 'source', tracker, args, i)
        evaluate(model, target_evaluate_loader, 'target', tracker, args, i)

    # Save logged classification loss, coral loss, source accuracy, target accuracy
    torch.save(tracker.to_dict(), args.da_loss + "_log.pth")
    print("Final Evaluation\r")
    return evaluate(model, target_evaluate_loader, 'target', tracker, args, i)
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    #Load the model through `infer_network`
    infer_network.load_model(args.model, device=args.device)

    # Create a flag for single images
    image_flag = False
    # Check if the input is a webcam
    if args.input == 'CAM':
        args.input = 0
    elif args.input.endswith(('.jpg', '.bmp', '.png')):
        image_flag = True
    # If the input file is not a video, stop the program
    elif not args.input.endswith(('.mp4', '.avi')):
        sys.exit(
            f"The format of the input file '{args.input.endswith}' is not supported."
        )

    #Handle the input stream
    cap = cv2.VideoCapture(args.input)
    cap.open(args.input)

    # Grab the shape of the input and the frame rate
    width = int(cap.get(3))
    height = int(cap.get(4))
    fps = cap.get(cv2.CAP_PROP_FPS)

    if not image_flag:
        # Create a video writer for the output video
        # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')`
        # on Mac, and `0x00000021` on Linux
        out = cv2.VideoWriter('out.mp4',
                              cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps,
                              (width, height))
        min_frame_count = 3  # minimum number of consecutive frame a pedestrian needs to be detected in
    else:
        out = None
        min_frame_count = 0  # minimum number of consecutive frame a pedestrian needs to be detected in

    # Initialize the list of tracked vehicle
    list_tracked_pedestrians = []
    list_trackers = []  # List of all trackers
    set_id_pedestrians = set()  # Set of all the pedestrians in total
    previous_count = 0

    #Loop until stream is over
    while cap.isOpened():
        #Read from the video capture
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        #Pre-process the image as needed
        net_input_shape = infer_network.get_input_shape()
        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        #Start asynchronous inference for specified request
        infer_network.async_inference(p_frame)

        # Wait for the result
        if infer_network.wait() == 0:
            # Get the results of the inference request
            result = infer_network.get_output()

            # Detect the objects in the new frame
            list_detections = infer_network.postprocess_output(
                result, width, height, args.prob_threshold)

            # Update the position of the tracked pedestrians
            list_trackers, list_detections, list_trackers_removed = updateTrackers(
                list_trackers, list_detections)

            # Add the remaining detections as new tracked pedestrians
            for detection in list_detections:
                x, y, w, h = detection
                list_trackers.append(Tracker(x, y, w, h))

            # Get the list of detected pedestrians (trackers detected in more than min_frame_count)
            list_tracked_pedestrians = [
                tracker for tracker in list_trackers
                if len(tracker.list_centroids) >= min_frame_count
            ]

            # Draw all the tracked vehicles in the current frame
            for pedestrian in list_tracked_pedestrians:
                pedestrian.drawOnFrame(frame)

            # --- Extract any desired stats from the results ---

            # Update the list of total pedestrians
            set_id_pedestrians = set_id_pedestrians.union(
                set([p.id for p in list_tracked_pedestrians]))

            # Number of pedestrians in the current frame
            current_count = len(list_tracked_pedestrians)

            # Total of pedestrians detected since the beginning of the video
            total_count = len(set_id_pedestrians)

            # Publish the results in the person topic
            if current_count != previous_count:
                previous_count = current_count
                client.publish(
                    "person",
                    json.dumps({
                        "count": current_count,
                        "total": total_count
                    }))

            # Get the total duration a person stayed in the frame when he/she leave the frame
            duration_min = 10  # minimum frame a tracker needs to exist for its duration to be taken in account

            if list_trackers_removed:
                list_duration = [
                    len(p.list_centroids) * 1 / fps
                    for p in list_trackers_removed
                    if len(p.list_centroids) > duration_min
                ]
                if list_duration:
                    duration = mean(list_duration)
                    client.publish("person/duration",
                                   json.dumps({"duration": duration}))

            # Send frame to the ffmpeg server
            sys.stdout.buffer.write(frame)
            sys.stdout.flush()

            # Write out the frame
            if image_flag:
                cv2.imwrite('output_image.jpg', frame)
            else:
                # cv2.putText(frame, f"{current_cout} | {total_count}", (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), thickness = 1)
                out.write(frame)

        # Break if escape key pressed
        if key_pressed == 27:
            break

    # Release the out writer, capture, and destroy any OpenCV windows
    if not image_flag:
        out.release()
    cap.release()
    cv2.destroyAllWindows()
def main():

    # Paper: In the training phase, we set the batch size to 128,
    # base learning rate to 10−3, weight decay to 5×10−4, and momentum to 0.9

    parser = argparse.ArgumentParser(
        description='Train - Evaluate DeepCORAL model')
    parser.add_argument('--disable_cuda',
                        action='store_true',
                        help='Disable CUDA')
    parser.add_argument('--epochs',
                        type=int,
                        default=50,
                        help='Number of total epochs to run')
    parser.add_argument('--batch_size',
                        type=int,
                        default=128,
                        help='Batch size')
    parser.add_argument('--lr', default=1e-3, help='Learning Rate')
    parser.add_argument('--decay',
                        default=5e-4,
                        help='Decay of the learning rate')
    parser.add_argument('--momentum', default=0.9, help="Optimizer's momentum")
    parser.add_argument('--lambda_coral',
                        type=float,
                        default=0.5,
                        help="Weight that trades off the adaptation with "
                        "classification accuracy on the source domain")
    parser.add_argument('--source',
                        default='amazon',
                        help="Source Domain (dataset)")
    parser.add_argument('--target',
                        default='webcam',
                        help="Target Domain (dataset)")

    args = parser.parse_args()
    args.device = None

    if not args.disable_cuda and torch.cuda.is_available():
        args.device = torch.device('cuda')
    else:
        args.device = torch.device('cpu')

    source_train_loader = get_loader(name_dataset=args.source,
                                     batch_size=args.batch_size,
                                     train=True)
    target_train_loader = get_loader(name_dataset=args.target,
                                     batch_size=args.batch_size,
                                     train=True)

    source_evaluate_loader = get_loader(name_dataset=args.source,
                                        batch_size=args.batch_size,
                                        train=False)
    target_evaluate_loader = get_loader(name_dataset=args.target,
                                        batch_size=args.batch_size,
                                        train=False)

    n_classes = len(source_train_loader.dataset.classes)

    # ~ Paper : "We initialized the other layers with the parameters pre-trained on ImageNet"
    # check https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py
    model = alexnet(pretrained=True)
    # ~ Paper : The dimension of last fully connected layer (fc8) was set to the number of categories (31)
    model.classifier[6] = nn.Linear(4096, n_classes)
    # ~ Paper : and initialized with N(0, 0.005)
    torch.nn.init.normal_(model.classifier[6].weight, mean=0, std=5e-3)

    # Initialize bias to small constant number (http://cs231n.github.io/neural-networks-2/#init)
    model.classifier[6].bias.data.fill_(0.01)

    model = model.to(device=args.device)

    # ~ Paper : "The learning rate of fc8 is set to 10 times the other layers as it was training from scratch."
    optimizer = torch.optim.SGD(
        [
            {
                'params': model.features.parameters()
            },
            {
                'params': model.classifier[:6].parameters()
            },
            # fc8 -> 7th element (index 6) in the Sequential block
            {
                'params': model.classifier[6].parameters(),
                'lr': 10 * args.lr
            }
        ],
        lr=args.lr,
        momentum=args.momentum)  # if not specified, the default lr is used

    tracker = Tracker()

    for i in range(args.epochs):
        train(model, optimizer, source_train_loader, target_train_loader,
              tracker, args, i)
        evaluate(model, source_evaluate_loader, 'source', tracker, args, i)
        evaluate(model, target_evaluate_loader, 'target', tracker, args, i)

    # Save logged classification loss, coral loss, source accuracy, target accuracy
    torch.save(tracker.to_dict(), "log.pth")
Beispiel #7
0
def track_keypoints(args):
    print("Tracking sekd keypoints with args: {0}. \n".format(args))

    print("Init feature extractor using SEKD.")
    feature_extractor = nets.get_sekd_model(
        args.model_name,
        weights_path=args.weights_path,
        confidence_threshold=args.conf_thresh,
        nms_radius=args.nms_radius,
        max_keypoints=args.max_keypoints,
        cuda=args.cuda,
        multi_scale=args.multi_scale,
        sub_pixel_location=args.sub_pixel_location)

    print("Init video stream from {0}.".format(args.input))
    video_stream = Video(args.input, args.camera_id, args.img_ext)

    print("Init tracker.")
    tracker = Tracker(args.max_length)

    # Create a window to display the result.
    if not args.no_display:
        window = 'SEKD Tracker'
        cv2.namedWindow(window)
    else:
        print('Do not show the results via GUI window.')

    # Create output directory if desired.
    if args.save_keypoints:
        print('Will save keypoints to {0}.'.format(args.keypoints_dir))
        if not os.path.exists(args.keypoints_dir):
            os.makedirs(args.keypoints_dir)
    if args.save_tracks:
        print('Will save tracks to {0}.'.format(args.tracks_dir))
        if not os.path.exists(args.tracks_dir):
            os.makedirs(args.tracks_dir)

    print('Processing each frame ...')
    while True:
        # Get a new image.
        img = video_stream.next_frame()
        if img is None:
            print('All frames have been processed.')
            if not args.no_display:
                print('Press any key to quit.')
                cv2.waitKey(0)
                cv2.destroyAllWindows()
            break

        # Resize img.
        if img.shape[0] > args.max_height or img.shape[1] > args.max_width:
            resize_ratio = min(args.max_height / img.shape[0],
                               args.max_width / img.shape[1])
            img = cv2.resize(img, (int(resize_ratio * img.shape[1]),
                                   int(resize_ratio * img.shape[0])))

        # Get points and descriptors.
        keypoints, descriptors = feature_extractor.detectAndCompute(img)

        # Save points and descriptors.
        if args.save_keypoints:
            img_name = video_stream.name_list[video_stream.i - 1]
            keypoints_filepath = os.path.join(args.keypoints_dir, img_name)
            print('Save keypoints to {0}'.format(keypoints_filepath))
            np.savez(keypoints_filepath,
                     keypoints=keypoints,
                     descriptors=descriptors)

        # Update tracks with the keypoints and descriptors.
        tracker.track(keypoints, descriptors)

        # Draw keypoint tracks on the input image.
        img_out = (img * 255.).astype('uint8')
        img_out = tracker.draw_tracks(img_out)

        # Save tracks.
        if args.save_tracks:
            img_name = video_stream.name_list[video_stream.i - 1]
            img_name = str(video_stream.i - 1).zfill(5) + '.png'
            tracks_filepath = os.path.join(args.tracks_dir, img_name)
            print('Save tracks to {0}'.format(tracks_filepath))
            cv2.imwrite(tracks_filepath, img_out)

            tracks_filepath = os.path.join(args.tracks_dir, img_name[:-4])
            np.savez(tracks_filepath,
                     tracks_backward=tracker.tracks_backward[-1])

        # Display visualization image to screen.
        if not args.no_display:
            cv2.imshow(window, img_out)
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                print('\'q\' has been pressed, quitting ...')
                cv2.destroyAllWindows()
                break

    print('Finshed tracking keypoints.')