Example #1
0
def main(args):

    # print args recap
    print(args, end="\n\n")

    # do not remove this line
    start = time.time()

    # Create the dataset object for example with the "ni, multi-task-nc, or nic
    # tracks" and assuming the core50 location in ./core50/data/
    dataset = CORE50(root='core50/data/',
                     scenario=args.scenario,
                     preload=args.preload_data)

    # Get the validation set
    print("Recovering validation set...")
    full_valdidset = dataset.get_full_valid_set()

    # model
    if args.classifier == 'ResNet18':
        classifier = models.resnet18(pretrained=True)
        classifier.fc = torch.nn.Linear(512, args.n_classes)

    opt = torch.optim.SGD(classifier.parameters(), lr=args.lr)
    criterion = torch.nn.CrossEntropyLoss()

    # vars to update over time
    valid_acc = []
    ext_mem_sz = []
    ram_usage = []
    heads = []
    ext_mem = None

    # loop over the training incremental batches (x, y, t)
    for i, train_batch in enumerate(dataset):
        train_x, train_y, t = train_batch

        # adding eventual replay patterns to the current batch
        idxs_cur = np.random.choice(train_x.shape[0],
                                    args.replay_examples,
                                    replace=False)

        if i == 0:
            ext_mem = [train_x[idxs_cur], train_y[idxs_cur]]
        else:
            ext_mem = [
                np.concatenate((train_x[idxs_cur], ext_mem[0])),
                np.concatenate((train_y[idxs_cur], ext_mem[1]))
            ]

        train_x = np.concatenate((train_x, ext_mem[0]))
        train_y = np.concatenate((train_y, ext_mem[1]))

        print("----------- batch {0} -------------".format(i))
        print("x shape: {0}, y shape: {1}".format(train_x.shape,
                                                  train_y.shape))
        print("Task Label: ", t)

        # train the classifier on the current batch/task
        _, _, stats = train_net(opt,
                                classifier,
                                criterion,
                                args.batch_size,
                                train_x,
                                train_y,
                                t,
                                args.epochs,
                                preproc=preprocess_imgs)
        if args.scenario == "multi-task-nc":
            heads.append(copy.deepcopy(classifier.fc))

        # collect statistics
        ext_mem_sz += stats['disk']
        ram_usage += stats['ram']

        # test on the validation set
        stats, _ = test_multitask(classifier,
                                  full_valdidset,
                                  args.batch_size,
                                  preproc=preprocess_imgs,
                                  multi_heads=heads,
                                  last_layer_name="fc",
                                  verbose=False)

        valid_acc += stats['acc']
        print("------------------------------------------")
        print("Avg. acc: {}".format(stats['acc']))
        print("------------------------------------------")

    # Generate submission.zip
    # directory with the code snapshot to generate the results
    sub_dir = 'submissions/' + args.sub_dir
    if not os.path.exists(sub_dir):
        os.makedirs(sub_dir)

    # copy code
    create_code_snapshot(".", sub_dir + "/code_snapshot")

    # generating metadata.txt: with all the data used for the CLScore
    elapsed = (time.time() - start) / 60
    print("Training Time: {}m".format(elapsed))
    with open(sub_dir + "/metadata.txt", "w") as wf:
        for obj in [
                np.average(valid_acc), elapsed,
                np.average(ram_usage),
                np.max(ram_usage),
                np.average(ext_mem_sz),
                np.max(ext_mem_sz)
        ]:
            wf.write(str(obj) + "\n")

    # test_preds.txt: with a list of labels separated by "\n"
    print("Final inference on test set...")
    full_testset = dataset.get_full_test_set()
    stats, preds = test_multitask(classifier,
                                  full_testset,
                                  args.batch_size,
                                  preproc=preprocess_imgs,
                                  multi_heads=heads,
                                  last_layer_name="fc",
                                  verbose=False)

    with open(sub_dir + "/test_preds.txt", "w") as wf:
        for pred in preds:
            wf.write(str(pred) + "\n")

    print("Experiment completed.")
Example #2
0
def main(args):

    # print args recap
    print(args, end="\n\n")

    # do not remove this line
    start = time.time()

    # Create the dataset object for example with the "ni, multi-task-nc, or nic tracks"
    # and assuming the core50 location in ./core50/data/
    # ???review CORE50 to see if there is a way to shorten dataset and runtime for testing
    # Original call to dataset. Takes a long time.
    # dataset = CORE50(root='core50/data/', scenario=args.scenario,
    #                  preload=args.preload_data)
    #
    # custom call to create CORE50 custom object
    # using train=True uses training set and allows more control over batches and other stuff.
    dataset = CORE50(root='core50/data/',
                     scenario=args.scenario,
                     preload=args.preload_data)

    # Get the validation set
    print("Recovering validation set...")
    # default full validation set
    # full_valdidset = dataset.get_full_valid_set()
    # reduced validation set
    full_valdidset = dataset.get_full_valid_set(reduced=True)

    # model
    if args.classifier == 'ResNet18':
        classifier = models.resnet18(
            pretrained=True)  # classifier is a pretrained model
        classifier.fc = torch.nn.Linear(
            512, args.n_classes
        )  # in features: 512 # out features: set below -> args.n_classes = 50  #  Applies a linear transformation to the incoming data

    opt = torch.optim.SGD(classifier.parameters(),
                          lr=args.lr)  # Implements stochastic gradient descent
    criterion = torch.nn.CrossEntropyLoss(
    )  # This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.

    # vars to update over time
    valid_acc = []
    ext_mem_sz = []
    ram_usage = []
    heads = []

    # Start Modification
    ewc_lambda = 4  # should this be higher? closer to 0.01 or 0.4 or 0.8? that is what was used in other examples. What does a higher penalty do? what does a lower penatly do?
    # fisher_max = 0.0001
    # variable dictionary to hold fisher values
    fisher_dict = {}
    # variable dictionary to hold previous optimized weight values
    optpar_dict = {}
    # End Modification

    # loop over the training incremental batches (x, y, t)
    for i, train_batch in enumerate(dataset):
        train_x, train_y, t = train_batch

        # Start Modifiction

        # Make train_x and train_y smaller for testing here
        limit_size = False  # make true to limit training size # make false to allow full training set
        if limit_size:
            train_size = 3200
            # train_size = 11900
            train_x = train_x[0:train_size]
            train_y = train_y[0:train_size]

        # End Modification

        # Print current batch number
        print("----------- batch {0} -------------".format(i))
        # Print current batch shape
        print("x shape: {0}, y shape: {1}".format(train_x.shape,
                                                  train_y.shape))
        # print task label type
        print("Task Label: ", t)

        # utils.train_net: a custom function to train neural network. returns stats.
        _, _, stats = train_net_ewc(opt,
                                    classifier,
                                    criterion,
                                    args.batch_size,
                                    train_x,
                                    train_y,
                                    t,
                                    fisher_dict,
                                    optpar_dict,
                                    ewc_lambda,
                                    args.epochs,
                                    preproc=preprocess_imgs)

        # if multi-task-nc: make deep copy in list heads (aka nn brains)
        if args.scenario == "multi-task-nc":
            heads.append(copy.deepcopy(classifier.fc))
        ext_mem_sz += stats['disk']
        ram_usage += stats['ram']

        # Start Modifiction
        # Calculate the Fisher matrix values given new completed task
        on_task_update(t,
                       train_x,
                       train_y,
                       fisher_dict,
                       optpar_dict,
                       classifier,
                       opt,
                       criterion,
                       args.batch_size,
                       preproc=preprocess_imgs
                       )  # training complete # compute fisher matrix values
        # End Modification

        # test all nn models in list heads for performance. return stats for each.
        stats, _ = test_multitask(classifier,
                                  full_valdidset,
                                  args.batch_size,
                                  preproc=preprocess_imgs,
                                  multi_heads=heads)

        # print new stats on performance
        valid_acc += stats['acc']
        print("------------------------------------------")
        print("Avg. acc: {}".format(stats['acc']))
        print("------------------------------------------")

    # Generate submission.zip
    # directory with the code snapshot to generate the results
    sub_dir = 'submissions/' + args.sub_dir
    if not os.path.exists(sub_dir):
        os.makedirs(sub_dir)

    # copy code
    # custom function in utils folder to deal with possible file path issues
    create_code_snapshot(".", sub_dir + "/code_snapshot")

    # generating metadata.txt: with all the data used for the CLScore
    elapsed = (time.time() - start) / 60
    print("Training Time: {}m".format(elapsed))
    with open(sub_dir + "/metadata.txt", "w") as wf:
        for obj in [
                np.average(valid_acc), elapsed,
                np.average(ram_usage),
                np.max(ram_usage),
                np.average(ext_mem_sz),
                np.max(ext_mem_sz)
        ]:
            wf.write(str(obj) + "\n")

    # run final full test
    # test_preds.txt: with a list of labels separated by "\n"
    print("Final inference on test set...")
    full_testset = dataset.get_full_test_set()
    stats, preds = test_multitask(classifier,
                                  full_testset,
                                  args.batch_size,
                                  preproc=preprocess_imgs)

    with open(sub_dir + "/test_preds.txt", "w") as wf:
        for pred in preds:
            wf.write(str(pred) + "\n")

    print("Experiment completed.")
def main(args):

    # print args recap
    print(args, end="\n\n")

    # do not remove this line
    start = time.time()

    # Create the dataset object for example with the "ni, multi-task-nc, or nic tracks"
    # and assuming the core50 location in ./core50/data/
    # ???review CORE50 to see if there is a way to shorten dataset and runtime for testing
    # Original call to dataset. Takes a long time.
    # dataset = CORE50(root='core50/data/', scenario=args.scenario,
    #                  preload=args.preload_data)
    #
    # custom call to create CORE50 custom object
    # using train=True uses training set and allows more control over batches and other stuff.
    dataset = CORE50(root='core50/data/',
                     scenario=args.scenario,
                     preload=args.preload_data)

    # Get the validation set
    print("Recovering validation set...")
    # default full validation set
    # full_valdidset = dataset.get_full_valid_set()
    # reduced validation set
    full_valdidset = dataset.get_full_valid_set(reduced=True)

    # model
    if args.classifier == 'ResNet18':
        classifier = models.resnet18(
            pretrained=True)  # classifier is a pretrained model
        classifier.fc = torch.nn.Linear(
            512, args.n_classes
        )  # in features: 512 # out features: set below -> args.n_classes = 50  #  Applies a linear transformation to the incoming data

    opt = torch.optim.SGD(classifier.parameters(),
                          lr=args.lr)  # Implements stochastic gradient descent
    criterion = torch.nn.CrossEntropyLoss(
    )  # This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.

    # vars to update over time
    valid_acc = []
    ext_mem_sz = []
    ram_usage = []
    heads = []

    # enumerate(dataset) provides itirator over all training sets and test sets
    # loop over the training incremental batches (x, y, t)
    for i, train_batch in enumerate(dataset):
        train_x, train_y, t = train_batch

        # Start modification

        # run batch 0 and 1. Then break.
        # if i == 2: break

        # shuffle new data
        train_x, train_y = shuffle_in_unison((train_x, train_y), seed=0)

        if i == 0:
            # this is the first round
            # store data for later
            all_x = train_x[0:train_x.shape[0] // 2]
            all_y = train_y[0:train_y.shape[0] // 2]
        else:
            # this is not the first round
            # create hybrid training set old and new data
            # shuffle old data
            all_x, all_y = shuffle_in_unison((all_x, all_y), seed=0)

            # create temp holder
            temp_x = train_x
            temp_y = train_y

            # set current variables to be used for training
            train_x = np.append(all_x, train_x, axis=0)
            train_y = np.append(all_y, train_y)
            train_x, train_y = shuffle_in_unison((train_x, train_y), seed=0)

            # append half of old and all of new data
            temp_x, temp_y = shuffle_in_unison((temp_x, temp_y), seed=0)
            keep_old = (all_x.shape[0] // (i + 1)) * i
            keep_new = temp_x.shape[0] // (i + 1)
            all_x = np.append(all_x[0:keep_old], temp_x[0:keep_new], axis=0)
            all_y = np.append(all_y[0:keep_old], temp_y[0:keep_new])
            del temp_x
            del temp_y

        # rest of code after this should be the same
        # End modification

        # Print current batch number
        print("----------- batch {0} -------------".format(i))
        # Print current batch shape
        print("x shape: {0}, y shape: {1}".format(train_x.shape,
                                                  train_y.shape))
        # print task label type
        print("Task Label: ", t)

        # utils.train_net: a custom function to train neural network. returns stats.
        _, _, stats = train_net(opt,
                                classifier,
                                criterion,
                                args.batch_size,
                                train_x,
                                train_y,
                                t,
                                args.epochs,
                                preproc=preprocess_imgs)

        # if multi-task-nc: make deep copy in list heads (aka nn brains)
        if args.scenario == "multi-task-nc":
            heads.append(copy.deepcopy(classifier.fc))
        ext_mem_sz += stats['disk']
        ram_usage += stats['ram']

        # test all nn models in list heads for performance. return stats for each.
        stats, _ = test_multitask(classifier,
                                  full_valdidset,
                                  args.batch_size,
                                  preproc=preprocess_imgs,
                                  multi_heads=heads)

        # print new stats on performance
        valid_acc += stats['acc']
        print("------------------------------------------")
        print("Avg. acc: {}".format(stats['acc']))
        print("------------------------------------------")

    # Generate submission.zip
    # directory with the code snapshot to generate the results
    sub_dir = 'submissions/' + args.sub_dir
    if not os.path.exists(sub_dir):
        os.makedirs(sub_dir)

    # copy code
    # custom function in utils folder to deal with possible file path issues
    create_code_snapshot(".", sub_dir + "/code_snapshot")

    # generating metadata.txt: with all the data used for the CLScore
    elapsed = (time.time() - start) / 60
    print("Training Time: {}m".format(elapsed))
    with open(sub_dir + "/metadata.txt", "w") as wf:
        for obj in [
                np.average(valid_acc), elapsed,
                np.average(ram_usage),
                np.max(ram_usage),
                np.average(ext_mem_sz),
                np.max(ext_mem_sz)
        ]:
            wf.write(str(obj) + "\n")

    # run final full test
    # test_preds.txt: with a list of labels separated by "\n"
    print("Final inference on test set...")
    full_testset = dataset.get_full_test_set()
    stats, preds = test_multitask(classifier,
                                  full_testset,
                                  args.batch_size,
                                  preproc=preprocess_imgs)

    with open(sub_dir + "/test_preds.txt", "w") as wf:
        for pred in preds:
            wf.write(str(pred) + "\n")

    print("Experiment completed.")
Example #4
0
    def train_model(self, tune=True, resize=False):
        #GE
        #print('Start training')
        #GE
        start = time.time()
        # vars to update over time
        valid_acc = []
        ext_mem_sz = []
        ram_usage = []
        heads = []
        num_tasks = self.dataset.nbatch[self.dataset.scenario]
        # loop over the training incremental batches (x, y, t)
        for i, train_batch in enumerate(self.dataset):
            train_x_raw, train_y_raw, t = train_batch
            train_x_raw = train_x_raw.astype(np.uint8)
            train_y_raw = train_y_raw.astype(np.uint8)
            del train_batch
            if self.args.verbose:
                print("----------- batch {0} -------------".format(i))
            for replay_epoch in range(self.args.replay_epochs):
                if self.args.verbose:
                    print("replay epoch {} for batch {}".format(
                        replay_epoch, i))
                if i > 0:
                    mem_imgs, mem_labels = self.buffer.get_mem(
                        self.replay_used)
                    train_x = np.concatenate((train_x_raw, mem_imgs))
                    train_y = np.concatenate((train_y_raw, mem_labels))
                else:
                    train_x = train_x_raw
                    train_y = train_y_raw

                if self.args.verbose:
                    print("x shape: {0}, y shape: {1}".format(
                        train_x.shape, train_y.shape))

                # train the classifier on the current batch/task
                if self.args.aug:
                    _, _, stats = train_net_aug(self.optimizer,
                                                self.model,
                                                self.loss,
                                                self.args.batch_size,
                                                train_x,
                                                train_y,
                                                i,
                                                self.args.epochs,
                                                self.train_aug,
                                                use_cuda=self.use_cuda,
                                                verbose=self.args.verbose)
                else:
                    _, _, stats = train_net(self.optimizer,
                                            self.model,
                                            self.loss,
                                            self.args.batch_size,
                                            train_x,
                                            train_y,
                                            i,
                                            self.args.epochs,
                                            preproc=preprocess_imgs,
                                            use_cuda=self.use_cuda,
                                            resize=resize)
                # collect statistics
                ext_mem_sz += stats['disk']
                ram_usage += stats['ram']
                if self.args.verbose and self.args.scenario != 'nic':
                    print(ram_usage)

            if self.args.scenario == "multi-task-nc":
                heads.append(copy.deepcopy(self.model.fc))

            # adding eventual replay patterns to the current batch
            self.buffer.update_buffer(train_x_raw, train_y_raw, i)

            # test on the validation set
            if self.args.aug:
                stats, _ = test_multitask_aug(self.model,
                                              self.full_valdidset,
                                              self.test_bz,
                                              self.test_aug,
                                              verbose=self.args.verbose,
                                              use_cuda=self.use_cuda)

            else:
                stats, _ = test_multitask(self.model,
                                          self.full_valdidset,
                                          self.args.batch_size,
                                          preproc=preprocess_imgs,
                                          multi_heads=heads,
                                          verbose=self.args.verbose,
                                          use_cuda=self.use_cuda,
                                          resize=resize)
            if self.args.verbose:
                print("------------------------------------------")
                print("Avg. acc:", stats['acc'])
                print("------------------------------------------")
            #GE
            print("------------------------------------------")
            print("Batch validation accuracy: {:.3f}".format(stats['acc'][0]))
            print("------------------------------------------")
            #GE
            valid_acc += stats['acc']

            # final review
            if i == num_tasks - 1:
                for replay_epoch in range(self.args.review_epoch):
                    for g in self.optimizer.param_groups:
                        g['lr'] = self.args.review_lr_factor * self.args.lr
                    train_x, train_y = self.buffer.get_mem(self.review_size)
                    if self.args.aug:
                        _, _, stats = train_net_aug(self.optimizer,
                                                    self.model,
                                                    self.loss,
                                                    self.args.batch_size,
                                                    train_x,
                                                    train_y,
                                                    i,
                                                    self.args.epochs,
                                                    self.train_aug,
                                                    use_cuda=self.use_cuda)
                        stats, _ = test_multitask_aug(
                            self.model,
                            self.full_valdidset,
                            self.test_bz,
                            self.test_aug,
                            verbose=self.args.verbose,
                            use_cuda=self.use_cuda)
                    else:
                        _, _, stats = train_net(self.optimizer,
                                                self.model,
                                                self.loss,
                                                self.args.batch_size,
                                                train_x,
                                                train_y,
                                                i,
                                                self.args.epochs,
                                                preproc=preprocess_imgs,
                                                use_cuda=self.use_cuda,
                                                resize=resize)
                        stats, _ = test_multitask(self.model,
                                                  self.full_valdidset,
                                                  self.args.batch_size,
                                                  preproc=preprocess_imgs,
                                                  multi_heads=heads,
                                                  verbose=self.args.verbose,
                                                  use_cuda=self.use_cuda,
                                                  resize=resize)
                    if self.args.verbose:
                        print("------------------------------------------")
                        print("Review Avg. acc:", stats['acc'])
                        print("------------------------------------------")

        final_val = stats['acc']
        # clear mem
        del self.buffer, self.full_valdidset
        full_testset = self.dataset.get_full_test_set()

        # generating metadata.txt: with all the data used for the CLScore
        elapsed = (time.time() - start) / 60
        print("Training Time: {:3f} min".format(elapsed))
        if not tune:
            # test_preds.txt: with a list of labels separated by "\n"
            print("Generating final predictions on test set...")
            if self.args.aug:
                stats, preds = test_multitask_aug(
                    self.model,
                    full_testset,
                    self.test_bz,
                    self.test_aug,
                    verbose=self.args.verbose,
                    use_cuda=self.use_cuda,
                    #GE
                    no_output=True
                    #GE
                )

            else:
                stats, preds = test_multitask(self.model,
                                              full_testset,
                                              self.args.batch_size,
                                              preproc=preprocess_imgs,
                                              multi_heads=heads,
                                              verbose=self.args.verbose,
                                              use_cuda=self.use_cuda,
                                              resize=resize)
            #elapsed = (time.time() - start) / 60
            #print("Total Time: {:3f} min".format(elapsed))
            #GE
            print('Run completed')
            #print('Full test set accuracy: {:.3f}'.format(stats['acc'][0]))
            #GE
            return valid_acc, elapsed, ram_usage, ext_mem_sz, preds
        else:
            return final_val[0], np.average(valid_acc), elapsed, np.average(
                ram_usage), np.max(ram_usage)
Example #5
0
    def train_model(self, tune=True, resize=False):
        start = time.time()
        # vars to update over time
        valid_acc = []
        ext_mem_sz = []
        ram_usage = []
        heads = []
        # loop over the training incremental batches (x, y, t)
        for i, train_batch in enumerate(self.dataset):
            train_x, train_y, _ = train_batch

            print("----------- batch {0} -------------".format(i))
            print("x shape: {0}, y shape: {1}".format(train_x.shape,
                                                      train_y.shape))

            # train the classifier on the current batch/task
            _, _, stats = self._train_step(optimizer=self.optimizer,
                                           model=self.model,
                                           criterion=self.loss,
                                           batch_size=self.args.batch_size,
                                           x=train_x,
                                           y=train_y,
                                           t=i,
                                           epochs=self.args.epochs,
                                           preproc=preprocess_imgs,
                                           use_cuda=self.use_cuda,
                                           resize=resize)

            ext_mem_sz += stats['disk']
            ram_usage += stats['ram']
            #print(ram_usage)
            if self.args.scenario == "multi-task-nc":
                heads.append(copy.deepcopy(self.model.fc))
            # test on the validation set
            stats, _ = test_multitask(self.model,
                                      self.full_valdidset,
                                      mb_size=64,
                                      preproc=preprocess_imgs,
                                      multi_heads=heads,
                                      verbose=True,
                                      use_cuda=self.use_cuda,
                                      resize=resize)
            train_x = preprocess_imgs(train_x)
            self.buffer.update_buffer(train_x, train_y, i)
            print(self.buffer.mem[0].shape)
            del train_x, train_y, train_batch
            valid_acc += stats['acc']
            print("------------------------------------------")
            print("Avg. acc: {}".format(stats['acc']))
            print("------------------------------------------")

        # final review

        if self.args.review_epoch == -1:
            mem_size = self.buffer.mem[1].shape[0]
            review_size = 15000
            if mem_size % review_size == 0:
                num_it = mem_size // review_size
            else:
                num_it = mem_size // review_size + 1
            for it in range(num_it):
                start = it * review_size
                end = (it + 1) * review_size
                train_x = self.buffer.mem[0][start:end]
                train_y = self.buffer.mem[1][start:end]
                _, _, stats = train_net(self.optimizer,
                                        self.model,
                                        self.loss,
                                        self.args.batch_size,
                                        train_x,
                                        train_y,
                                        0,
                                        1,
                                        preproc=None,
                                        use_cuda=self.use_cuda,
                                        resize=resize)
                stats, _ = test_multitask(self.model,
                                          self.full_valdidset,
                                          mb_size=64,
                                          preproc=preprocess_imgs,
                                          multi_heads=heads,
                                          verbose=True,
                                          use_cuda=self.use_cuda,
                                          resize=resize)
                print("------------------------------------------")
                print("review Avg. acc: {}".format(stats['acc']))
                print("------------------------------------------")

        else:
            for replay_epoch in range(self.args.review_epoch):
                idxs_use = np.random.choice(self.buffer.mem[1].shape[0],
                                            self.args.review_size,
                                            replace=False)
                train_x = self.buffer.mem[0][idxs_use]
                train_y = self.buffer.mem[1][idxs_use]
                _, _, stats = train_net(self.optimizer,
                                        self.model,
                                        self.loss,
                                        self.args.batch_size,
                                        train_x,
                                        train_y,
                                        0,
                                        1,
                                        preproc=None,
                                        use_cuda=self.use_cuda,
                                        resize=resize)
                stats, _ = test_multitask(self.model,
                                          self.full_valdidset,
                                          mb_size=64,
                                          preproc=preprocess_imgs,
                                          multi_heads=heads,
                                          verbose=True,
                                          use_cuda=self.use_cuda,
                                          resize=resize)
                print("------------------------------------------")
                print("review Avg. acc: {}".format(stats['acc']))
                print("------------------------------------------")

        final_val = stats['acc']
        del self.full_valdidset, self.buffer
        elapsed = (time.time() - start) / 60
        print("Training Time: {:.3f} min".format(elapsed))
        full_testset = self.dataset.get_full_test_set()
        if not tune:
            # test_preds.txt: with a list of labels separated by "\n"
            print("Final inference on test set...")
            stats, preds = test_multitask(self.model,
                                          full_testset,
                                          mb_size=32,
                                          preproc=preprocess_imgs,
                                          multi_heads=heads,
                                          verbose=True,
                                          use_cuda=self.use_cuda,
                                          resize=resize)
            return valid_acc, elapsed, ram_usage, ext_mem_sz, preds
        else:
            return final_val[0], np.average(valid_acc), elapsed, np.average(
                ram_usage), np.max(ram_usage)