Example #1
0
def run(args):

    # Set default arguments & check for incompatible options
    args.lr_gen = args.lr if args.lr_gen is None else args.lr_gen
    args.g_iters = args.iters if args.g_iters is None else args.g_iters
    args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay
    args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni
    # -if [log_per_task], reset all logs
    if args.log_per_task:
        args.prec_log = args.iters
        args.loss_log = args.iters
        args.sample_log = args.iters
    # -if [iCaRL] is selected, select all accompanying options
    if hasattr(args, "icarl") and args.icarl:
        args.use_exemplars = True
        args.add_exemplars = True
        args.bce = True
        args.bce_distill = True
    # -if XdG is selected but not the Task-IL scenario, give error
    if (not args.scenario == "task") and args.xdg:
        raise ValueError("'XdG' is only compatible with the Task-IL scenario.")
    # -if EWC, SI or XdG is selected together with 'feedback', give error
    if args.feedback and (args.ewc or args.si or args.xdg or args.icarl):
        raise NotImplementedError(
            "EWC, SI, XdG and iCaRL are not supported with feedback connections."
        )
    # -if binary classification loss is selected together with 'feedback', give error
    if args.feedback and args.bce:
        raise NotImplementedError(
            "Binary classification loss not supported with feedback connections."
        )
    # -if XdG is selected together with both replay and EWC, give error (either one of them alone with XdG is fine)
    if args.xdg and (not args.replay == "none") and (args.ewc or args.si):
        raise NotImplementedError(
            "XdG is not supported with both '{}' replay and EWC / SI.".format(
                args.replay))
        #--> problem is that applying different task-masks interferes with gradient calculation
        #    (should be possible to overcome by calculating backward step on EWC/SI-loss also for each mask separately)
    # -if 'BCEdistill' is selected for other than scenario=="class", give error
    if args.bce_distill and not args.scenario == "class":
        raise ValueError(
            "BCE-distill can only be used for class-incremental learning.")
    # -create plots- and results-directories if needed
    if not os.path.isdir(args.r_dir):
        os.mkdir(args.r_dir)
    if args.pdf and not os.path.isdir(args.p_dir):
        os.mkdir(args.p_dir)

    scenario = args.scenario
    # If Task-IL scenario is chosen with single-headed output layer, set args.scenario to "domain"
    # (but note that when XdG is used, task-identity information is being used so the actual scenario is still Task-IL)
    if args.singlehead and args.scenario == "task":
        scenario = "domain"

    # If only want param-stamp, get it printed to screen and exit
    if hasattr(args, "get_stamp") and args.get_stamp:
        _ = get_param_stamp_from_args(args=args)
        exit()

    # Use cuda?
    cuda = torch.cuda.is_available() and args.cuda
    device = torch.device("cuda" if cuda else "cpu")

    # Set random seeds
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    #-------------------------------------------------------------------------------------------------#

    #----------------#
    #----- DATA -----#
    #----------------#

    # Prepare data for chosen experiment
    (train_datasets,
     test_datasets), config, classes_per_task = get_multitask_experiment(
         name=args.experiment,
         scenario=scenario,
         tasks=args.tasks,
         data_dir=args.d_dir,
         verbose=True,
         exception=True if args.seed == 0 else False,
     )

    #print(train_datasets, test_datasets)
    #a = input()
    #-------------------------------------------------------------------------------------------------#

    #------------------------------#
    #----- MODEL (CLASSIFIER) -----#
    #------------------------------#

    # Define main model (i.e., classifier, if requested with feedback connections)
    if args.feedback:
        model = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            z_dim=args.z_dim,
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        model.lamda_pl = 1.  #--> to make that this VAE is also trained to classify
    else:
        model = Classifier(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            fc_drop=args.fc_drop,
            fc_nl=args.fc_nl,
            fc_bn=True if args.fc_bn == "yes" else False,
            excit_buffer=True if args.xdg and args.gating_prop > 0 else False,
            binaryCE=args.bce,
            binaryCE_distill=args.bce_distill,
        ).to(device)

    # Define optimizer (only include parameters that "requires_grad")
    model.optim_list = [{
        'params':
        filter(lambda p: p.requires_grad, model.parameters()),
        'lr':
        args.lr
    }]
    model.optim_type = args.optimizer
    if model.optim_type in ("adam", "adam_reset"):
        model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))
    elif model.optim_type == "sgd":
        model.optimizer = optim.SGD(model.optim_list)
    else:
        raise ValueError(
            "Unrecognized optimizer, '{}' is not currently a valid option".
            format(args.optimizer))

    #-------------------------------------------------------------------------------------------------#

    #----------------------------------#
    #----- CL-STRATEGY: EXEMPLARS -----#
    #----------------------------------#

    # Store in model whether, how many and in what way to store exemplars
    if isinstance(model, ExemplarHandler) and (args.use_exemplars
                                               or args.add_exemplars
                                               or args.replay == "exemplars"):
        model.memory_budget = args.budget
        model.norm_exemplars = args.norm_exemplars
        model.herding = args.herding

    #-------------------------------------------------------------------------------------------------#

    #-----------------------------------#
    #----- CL-STRATEGY: ALLOCATION -----#
    #-----------------------------------#

    # Elastic Weight Consolidation (EWC)
    if isinstance(model, ContinualLearner):
        model.ewc_lambda = args.ewc_lambda if args.ewc else 0
        if args.ewc:
            model.fisher_n = args.fisher_n
            model.gamma = args.gamma
            model.online = args.online
            model.emp_FI = args.emp_fi

    # Synpatic Intelligence (SI)
    if isinstance(model, ContinualLearner):
        model.si_c = args.si_c if args.si else 0
        if args.si:
            model.epsilon = args.epsilon

    # XdG: create for every task a "mask" for each hidden fully connected layer
    if isinstance(model, ContinualLearner) and (args.xdg
                                                and args.gating_prop > 0):
        mask_dict = {}
        excit_buffer_list = []
        for task_id in range(args.tasks):
            mask_dict[task_id + 1] = {}
            for i in range(model.fcE.layers):
                layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear
                if task_id == 0:
                    excit_buffer_list.append(layer.excit_buffer)
                n_units = len(layer.excit_buffer)
                gated_units = np.random.choice(n_units,
                                               size=int(args.gating_prop *
                                                        n_units),
                                               replace=False)
                mask_dict[task_id + 1][i] = gated_units
        model.mask_dict = mask_dict
        model.excit_buffer_list = excit_buffer_list

    #-------------------------------------------------------------------------------------------------#

    #-------------------------------#
    #----- CL-STRATEGY: REPLAY -----#
    #-------------------------------#

    # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature)
    if isinstance(model, Replayer):
        model.replay_targets = "soft" if args.distill else "hard"
        model.KD_temp = args.temp

    # If needed, specify separate model for the generator
    train_gen = True if (args.replay == "generative"
                         and not args.feedback) else False
    if train_gen:
        # -specify architecture
        generator = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            fc_layers=args.g_fc_lay,
            fc_units=args.g_fc_uni,
            z_dim=args.g_z_dim,
            classes=config['classes'],
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        # -set optimizer(s)
        generator.optim_list = [{
            'params':
            filter(lambda p: p.requires_grad, generator.parameters()),
            'lr':
            args.lr_gen
        }]
        generator.optim_type = args.optimizer
        if generator.optim_type in ("adam", "adam_reset"):
            generator.optimizer = optim.Adam(generator.optim_list,
                                             betas=(0.9, 0.999))
        elif generator.optim_type == "sgd":
            generator.optimizer = optim.SGD(generator.optim_list)
    else:
        generator = None

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- REPORTING -----#
    #---------------------#

    # Get parameter-stamp (and print on screen)
    param_stamp = get_param_stamp(
        args,
        model.name,
        verbose=True,
        replay=True if (not args.replay == "none") else False,
        replay_model_name=generator.name if
        (args.replay == "generative" and not args.feedback) else None,
    )

    # Print some model-characteristics on the screen
    # -main model
    print("\n")
    utils.print_model_info(model, title="MAIN MODEL")
    # -generator
    if generator is not None:
        utils.print_model_info(generator, title="GENERATOR")

    # Prepare for plotting in visdom
    # -define [precision_dict] to keep track of performance during training for storing and for later plotting in pdf
    precision_dict = evaluate.initiate_precision_dict(args.tasks)
    precision_dict_exemplars = evaluate.initiate_precision_dict(
        args.tasks) if args.use_exemplars else None
    # -visdom-settings
    if args.visdom:
        env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment,
                                                    tasks=args.tasks,
                                                    scenario=args.scenario)
        graph_name = "{fb}{replay}{syn}{ewc}{xdg}{icarl}{bud}".format(
            fb="1M-" if args.feedback else "",
            replay="{}{}".format(args.replay, "D" if args.distill else ""),
            syn="-si{}".format(args.si_c) if args.si else "",
            ewc="-ewc{}{}".format(
                args.ewc_lambda, "-O{}".format(args.gamma)
                if args.online else "") if args.ewc else "",
            xdg="" if (not args.xdg) or args.gating_prop == 0 else
            "-XdG{}".format(args.gating_prop),
            icarl="-iCaRL" if (args.use_exemplars and args.add_exemplars
                               and args.bce and args.bce_distill) else "",
            bud="-bud{}".format(args.budget) if
            (args.use_exemplars or args.add_exemplars
             or args.replay == "exemplars") else "",
        )
        visdom = {'env': env_name, 'graph': graph_name}
        if args.use_exemplars:
            visdom_exemplars = {
                'env': env_name,
                'graph': "{}-EX".format(graph_name)
            }
    else:
        visdom = visdom_exemplars = None

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- CALLBACKS -----#
    #---------------------#

    # Callbacks for reporting on and visualizing loss
    generator_loss_cbs = [
        cb._VAE_loss_cb(
            log=args.loss_log,
            visdom=visdom,
            model=model if args.feedback else generator,
            tasks=args.tasks,
            iters_per_task=args.iters if args.feedback else args.g_iters,
            replay=False if args.replay == "none" else True)
    ] if (train_gen or args.feedback) else [None]
    solver_loss_cbs = [
        cb._solver_loss_cb(log=args.loss_log,
                           visdom=visdom,
                           model=model,
                           tasks=args.tasks,
                           iters_per_task=args.iters,
                           replay=False if args.replay == "none" else True)
    ] if (not args.feedback) else [None]

    # Callbacks for evaluating and plotting generated / reconstructed samples
    sample_cbs = [
        cb._sample_cb(
            log=args.sample_log,
            visdom=visdom,
            config=config,
            test_datasets=test_datasets,
            sample_size=args.sample_n,
            iters_per_task=args.iters if args.feedback else args.g_iters)
    ] if (train_gen or args.feedback) else [None]

    # Callbacks for reporting and visualizing accuracy
    # -visdom (i.e., after each [prec_log]
    eval_cb = cb._eval_cb(
        log=args.prec_log,
        test_datasets=test_datasets,
        visdom=visdom,
        precision_dict=None,
        iters_per_task=args.iters,
        test_size=args.prec_n,
        classes_per_task=classes_per_task,
        scenario=scenario,
    )
    # -pdf / reporting: summary plots (i.e, only after each task)
    eval_cb_full = cb._eval_cb(
        log=args.iters,
        test_datasets=test_datasets,
        precision_dict=precision_dict,
        iters_per_task=args.iters,
        classes_per_task=classes_per_task,
        scenario=scenario,
    )
    # -with exemplars (both for visdom & reporting / pdf)
    eval_cb_exemplars = cb._eval_cb(
        log=args.iters,
        test_datasets=test_datasets,
        visdom=visdom_exemplars,
        classes_per_task=classes_per_task,
        precision_dict=precision_dict_exemplars,
        scenario=scenario,
        iters_per_task=args.iters,
        with_exemplars=True,
    ) if args.use_exemplars else None
    # -collect them in <lists>
    eval_cbs = [eval_cb, eval_cb_full]
    eval_cbs_exemplars = [eval_cb_exemplars]

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- TRAINING -----#
    #--------------------#

    print("--> Training:" + args.name)
    print("Total tasks:" + str(args.tasks_to_complete))
    # Keep track of training-time
    start = time.time()
    # Train model
    train_cl(
        args.tasks_to_complete,
        args.name,
        model,
        train_datasets,
        test_datasets,
        replay_mode=args.replay,
        scenario=scenario,
        classes_per_task=classes_per_task,
        iters=args.iters,
        batch_size=args.batch,
        generator=generator,
        gen_iters=args.g_iters,
        gen_loss_cbs=generator_loss_cbs,
        sample_cbs=sample_cbs,
        eval_cbs=eval_cbs,
        loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs,
        eval_cbs_exemplars=eval_cbs_exemplars,
        use_exemplars=args.use_exemplars,
        add_exemplars=args.add_exemplars,
    )
    # Get total training-time in seconds, and write to file
    training_time = time.time() - start
    time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w')
    time_file.write('{}\n'.format(training_time))
    time_file.close()

    #-------------------------------------------------------------------------------------------------#

    #----------------------#
    #----- EVALUATION -----#
    #----------------------#

    print("\n\n--> Evaluation ({}-incremental learning scenario):".format(
        args.scenario))

    # Evaluate precision of final model on full test-set
    precs = [
        evaluate.validate(
            model,
            test_datasets[i],
            verbose=False,
            test_size=None,
            task=i + 1,
            with_exemplars=False,
            allowed_classes=list(
                range(classes_per_task * i, classes_per_task *
                      (i + 1))) if scenario == "task" else None)
        for i in range(args.tasks)
    ]
    print("\n Precision on test-set (softmax classification):")
    for i in range(args.tasks):
        print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
    average_precs = sum(precs) / args.tasks
    print('=> average precision over all {} tasks: {:.4f}'.format(
        args.tasks, average_precs))

    # -with exemplars
    if args.use_exemplars:
        precs = [
            evaluate.validate(
                model,
                test_datasets[i],
                verbose=False,
                test_size=None,
                task=i + 1,
                with_exemplars=True,
                allowed_classes=list(
                    range(classes_per_task * i, classes_per_task *
                          (i + 1))) if scenario == "task" else None)
            for i in range(args.tasks)
        ]
        print("\n Precision on test-set (classification using exemplars):")
        for i in range(args.tasks):
            print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
        average_precs_ex = sum(precs) / args.tasks
        print('=> average precision over all {} tasks: {:.4f}'.format(
            args.tasks, average_precs_ex))
    print("\n")

    #-------------------------------------------------------------------------------------------------#

    #------------------#
    #----- OUTPUT -----#
    #------------------#

    # Average precision on full test set
    output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w')
    output_file.write('{}\n'.format(
        average_precs_ex if args.use_exemplars else average_precs))
    output_file.close()
    # -precision-dict
    file_name = "{}/dict-{}".format(args.r_dir, param_stamp)
    utils.save_object(
        precision_dict_exemplars if args.use_exemplars else precision_dict,
        file_name)

    # Average precision on full test set not evaluated using exemplars (i.e., using softmax on final layer)
    if args.use_exemplars:
        output_file = open(
            "{}/prec_noex-{}.txt".format(args.r_dir, param_stamp), 'w')
        output_file.write('{}\n'.format(average_precs))
        output_file.close()
        # -precision-dict:
        file_name = "{}/dict_noex-{}".format(args.r_dir, param_stamp)
        utils.save_object(precision_dict, file_name)

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- PLOTTING -----#
    #--------------------#

    # If requested, generate pdf
    if args.pdf:
        # -open pdf
        pp = visual_plt.open_pdf("{}/{}.pdf".format(args.p_dir, param_stamp))

        # -show samples and reconstructions (either from main model or from separate generator)
        if args.feedback or args.replay == "generative":
            evaluate.show_samples(model if args.feedback else generator,
                                  config,
                                  size=args.sample_n,
                                  pdf=pp)
            for i in range(args.tasks):
                evaluate.show_reconstruction(
                    model if args.feedback else generator,
                    test_datasets[i],
                    config,
                    pdf=pp,
                    task=i + 1)

        # -show metrics reflecting progression during training
        figure_list = []  #-> create list to store all figures to be plotted
        # -generate all figures (and store them in [figure_list])
        figure = visual_plt.plot_lines(
            precision_dict["all_tasks"],
            x_axes=precision_dict["x_task"],
            line_names=['task {}'.format(i + 1) for i in range(args.tasks)])
        figure_list.append(figure)
        figure = visual_plt.plot_lines([precision_dict["average"]],
                                       x_axes=precision_dict["x_task"],
                                       line_names=['average all tasks so far'])
        figure_list.append(figure)
        if args.use_exemplars:
            figure = visual_plt.plot_lines(
                precision_dict_exemplars["all_tasks"],
                x_axes=precision_dict_exemplars["x_task"],
                line_names=[
                    'task {}'.format(i + 1) for i in range(args.tasks)
                ])
            figure_list.append(figure)
        # -add figures to pdf (and close this pdf).
        for figure in figure_list:
            pp.savefig(figure)

        # -close pdf
        pp.close()
def run(args):

    # Set default arguments
    args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay
    args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni
    args.g_iters = args.iters if args.g_iters is None else args.g_iters
    # -if [log_per_task], reset all logs
    if args.log_per_task:
        args.prec_log = args.iters
        args.loss_log = args.iters
        args.sample_log = args.iters
    # -if XdG is selected but not the incremental task learning scenario, give error
    if (not args.scenario == "task") and args.gating_prop > 0:
        raise ValueError(
            "'XdG' only works for the incremental task learning scenario.")
    # -if EWC, SI or XdG is selected together with 'feedback', give error
    if args.feedback and (args.ewc or args.si or args.gating_prop > 0):
        raise NotImplementedError(
            "EWC, SI and XdG are not supported with feedback connections.")
    # -if XdG is selected together with replay of any kind, give error
    if args.gating_prop > 0 and (not args.replay == "none"):
        raise NotImplementedError(
            "XdG is not supported with '{}' replay.".format(args.replay))
    # -create plots- and results-directories if needed
    if not os.path.isdir(args.r_dir):
        os.mkdir(args.r_dir)
    if args.pdf and not os.path.isdir(args.p_dir):
        os.mkdir(args.p_dir)

    # Use cuda?
    cuda = torch.cuda.is_available() and args.cuda
    device = torch.device("cuda" if cuda else "cpu")

    # Set random seeds
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    #-------------------------------------------------------------------------------------------------#

    #----------------#
    #----- DATA -----#
    #----------------#

    # Prepare data for chosen experiment
    (train_datasets,
     test_datasets), config, classes_per_task = get_multitask_experiment(
         name=args.experiment,
         scenario=args.scenario,
         tasks=args.tasks,
         data_dir=args.d_dir,
         verbose=True,
         exception=True if args.seed == 0 else False,
     )

    #-------------------------------------------------------------------------------------------------#

    #------------------------------#
    #----- MODEL (CLASSIFIER) -----#
    #------------------------------#

    # Define main model (i.e., classifier, if requested with feedback connections)
    if args.feedback:
        model = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            z_dim=args.z_dim,
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        model.lamda_pl = 1.  #--> to make that this VAE is also trained to classify
    else:
        model = Classifier(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            fc_drop=args.fc_drop,
            fc_nl=args.fc_nl,
            fc_bn=True if args.fc_bn == "yes" else False,
            excit_buffer=True if args.gating_prop > 0 else False,
        ).to(device)

    # Define optimizer (only include parameters that "requires_grad")
    model.optim_list = [{
        'params':
        filter(lambda p: p.requires_grad, model.parameters()),
        'lr':
        args.lr
    }]
    model.optim_type = args.optimizer
    if model.optim_type in ("adam", "adam_reset"):
        model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))
    elif model.optim_type == "sgd":
        model.optimizer = optim.SGD(model.optim_list)
    else:
        raise ValueError(
            "Unrecognized optimizer, '{}' is not currently a valid option".
            format(args.optimizer))

    # Set loss-function for reconstruction
    if args.feedback:
        model.recon_criterion = nn.BCELoss(size_average=True)

    #-------------------------------------------------------------------------------------------------#

    #-----------------------------------#
    #----- CL-STRATEGY: ALLOCATION -----#
    #-----------------------------------#

    # Elastic Weight Consolidation (EWC)
    if isinstance(model, ContinualLearner):
        model.ewc_lambda = args.ewc_lambda if args.ewc else 0
        model.fisher_n = args.fisher_n
        model.gamma = args.gamma
        model.online = args.online
        model.emp_FI = args.emp_fi

    # Synpatic Intelligence (SI)
    if isinstance(model, ContinualLearner):
        model.si_c = args.si_c if args.si else 0
        model.epsilon = args.epsilon

    # XdG: create for every task a "mask" for each hidden fully connected layer
    if isinstance(model, ContinualLearner) and args.gating_prop > 0:
        mask_dict = {}
        excit_buffer_list = []
        for task_id in range(args.tasks):
            mask_dict[task_id + 1] = {}
            for i in range(model.fcE.layers):
                layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear
                if task_id == 0:
                    excit_buffer_list.append(layer.excit_buffer)
                n_units = len(layer.excit_buffer)
                gated_units = np.random.choice(n_units,
                                               size=int(args.gating_prop *
                                                        n_units),
                                               replace=False)
                mask_dict[task_id + 1][i] = gated_units
        model.mask_dict = mask_dict
        model.excit_buffer_list = excit_buffer_list

    #-------------------------------------------------------------------------------------------------#

    #-------------------------------#
    #----- CL-STRATEGY: REPLAY -----#
    #-------------------------------#

    # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature)
    model.replay_targets = "soft" if args.distill else "hard"
    model.KD_temp = args.temp

    # If needed, specify separate model for the generator
    train_gen = True if (args.replay == "generative"
                         and not args.feedback) else False
    if train_gen:
        # -specify architecture
        generator = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            fc_layers=args.g_fc_lay,
            fc_units=args.g_fc_uni,
            z_dim=args.z_dim,
            classes=config['classes'],
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        # -set optimizer(s)
        generator.optim_list = [{
            'params':
            filter(lambda p: p.requires_grad, generator.parameters()),
            'lr':
            args.lr
        }]
        generator.optim_type = args.optimizer
        if generator.optim_type in ("adam", "adam_reset"):
            generator.optimizer = optim.Adam(generator.optim_list,
                                             betas=(0.9, 0.999))
        elif generator.optim_type == "sgd":
            generator.optimizer = optim.SGD(generator.optim_list)
        # -set reconstruction criterion
        generator.recon_criterion = nn.BCELoss(size_average=True)
    else:
        generator = None

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- REPORTING -----#
    #---------------------#

    # Get parameter-stamp (and print on screen)
    param_stamp = utils.get_param_stamp(
        args,
        model.name,
        verbose=True,
        replay=True if (not args.replay == "none") else False,
        replay_model_name=generator.name if
        (args.replay == "generative" and not args.feedback) else None,
    )

    # Print some model-characteristics on the screen
    # -main model
    print("\n")
    utils.print_model_info(model, title="MAIN MODEL")
    # -generator
    if generator is not None:
        utils.print_model_info(generator, title="GENERATOR")

    # Prepare for plotting
    # -open pdf
    pp = visual_plt.open_pdf("{}/{}.pdf".format(
        args.p_dir, param_stamp)) if args.pdf else None
    # -define [precision_dict] to keep track of performance during training for later plotting
    precision_dict = evaluate.initiate_precision_dict(args.tasks)
    # -visdom-settings
    if args.visdom:
        env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment,
                                                    tasks=args.tasks,
                                                    scenario=args.scenario)
        graph_name = "{fb}{mode}{syn}{ewc}{XdG}".format(
            fb="1M-" if args.feedback else "",
            mode=args.replay,
            syn="-si{}".format(args.si_c) if args.si else "",
            ewc="-ewc{}{}".format(
                args.ewc_lambda, "-O{}".format(args.gamma)
                if args.online else "") if args.ewc else "",
            XdG=""
            if args.gating_prop == 0 else "-XdG{}".format(args.gating_prop))
        visdom = {'env': env_name, 'graph': graph_name}
    else:
        visdom = None

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- CALLBACKS -----#
    #---------------------#

    # Callbacks for reporting on and visualizing loss
    generator_loss_cbs = [
        cb._VAE_loss_cb(log=args.loss_log,
                        visdom=visdom,
                        model=model if args.feedback else generator,
                        tasks=args.tasks,
                        iters_per_task=args.g_iters,
                        replay=False if args.replay == "none" else True)
    ] if (train_gen or args.feedback) else [None]
    solver_loss_cbs = [
        cb._solver_loss_cb(log=args.loss_log,
                           visdom=visdom,
                           model=model,
                           tasks=args.tasks,
                           iters_per_task=args.iters,
                           replay=False if args.replay == "none" else True)
    ] if (not args.feedback) else [None]

    # Callbacks for evaluating and plotting generated / reconstructed samples
    sample_cbs = [
        cb._sample_cb(log=args.sample_log,
                      visdom=visdom,
                      config=config,
                      test_datasets=test_datasets,
                      sample_size=args.sample_n,
                      iters_per_task=args.g_iters)
    ] if (train_gen or args.feedback) else [None]

    # Callbacks for reporting and visualizing accuracy
    # -visdom (i.e., after each [prec_log])
    eval_cb = cb._eval_cb(
        log=args.prec_log,
        test_datasets=test_datasets,
        visdom=visdom,
        iters_per_task=args.iters,
        scenario=args.scenario,
        collate_fn=utils.label_squeezing_collate_fn,
        test_size=args.prec_n,
        classes_per_task=classes_per_task,
        task_mask=True if isinstance(model, ContinualLearner) and
        (args.gating_prop > 0) else False)
    # -pdf: for summary plots (i.e, only after each task)
    eval_cb_full = cb._eval_cb(
        log=args.iters,
        test_datasets=test_datasets,
        precision_dict=precision_dict,
        scenario=args.scenario,
        collate_fn=utils.label_squeezing_collate_fn,
        iters_per_task=args.iters,
        classes_per_task=classes_per_task,
        task_mask=True if isinstance(model, ContinualLearner) and
        (args.gating_prop > 0) else False)
    # -collect them in <lists>
    eval_cbs = [eval_cb, eval_cb_full]

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- TRAINING -----#
    #--------------------#

    print("--> Training:")
    # Keep track of training-time
    start = time.time()
    # Train model
    train_cl(
        model,
        train_datasets,
        replay_mode=args.replay,
        scenario=args.scenario,
        classes_per_task=classes_per_task,
        iters=args.iters,
        batch_size=args.batch,
        collate_fn=utils.label_squeezing_collate_fn,
        visualize=True if args.visdom else False,
        generator=generator,
        gen_iters=args.g_iters,
        gen_loss_cbs=generator_loss_cbs,
        sample_cbs=sample_cbs,
        eval_cbs=eval_cbs,
        loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs,
    )
    # Get total training-time in seconds, and write to file
    training_time = time.time() - start
    time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w')
    time_file.write('{}\n'.format(training_time))
    time_file.close()

    #-------------------------------------------------------------------------------------------------#

    #----------------------#
    #----- EVALUATION -----#
    #----------------------#

    print('\n\n--> Evaluation ("incremental {} learning scenario"):'.format(
        args.scenario))

    # Generation (plot in pdf)
    if (pp is not None) and train_gen:
        evaluate.show_samples(generator, config, size=args.sample_n, pdf=pp)
    if (pp is not None) and args.feedback:
        evaluate.show_samples(model, config, size=args.sample_n, pdf=pp)

    # Reconstruction (plot in pdf)
    if (pp is not None) and (train_gen or args.feedback):
        for i in range(args.tasks):
            if args.feedback:
                evaluate.show_reconstruction(model,
                                             test_datasets[i],
                                             config,
                                             pdf=pp,
                                             task=i + 1)
            else:
                evaluate.show_reconstruction(generator,
                                             test_datasets[i],
                                             config,
                                             pdf=pp,
                                             task=i + 1)

    # Classifier (print on screen & write to file)
    if args.scenario == "task":
        precs = [
            evaluate.validate(
                model,
                test_datasets[i],
                verbose=False,
                test_size=None,
                task_mask=True if isinstance(model, ContinualLearner)
                and args.gating_prop > 0 else False,
                task=i + 1,
                allowed_classes=list(
                    range(classes_per_task * i, classes_per_task * (i + 1))))
            for i in range(args.tasks)
        ]
    else:
        precs = [
            evaluate.validate(model,
                              test_datasets[i],
                              verbose=False,
                              test_size=None,
                              task=i + 1) for i in range(args.tasks)
        ]
    print("\n Precision on test-set:")
    for i in range(args.tasks):
        print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
    average_precs = sum(precs) / args.tasks
    print('=> average precision over all {} tasks: {:.4f}\n'.format(
        args.tasks, average_precs))

    #-------------------------------------------------------------------------------------------------#

    #------------------#
    #----- OUTPUT -----#
    #------------------#

    # Average precision on full test set (no restrictions on which nodes can be predicted: "incremental" / "singlehead")
    output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w')
    output_file.write('{}\n'.format(average_precs))
    output_file.close()

    # Precision-dictionary
    file_name = "{}/dict-{}".format(args.r_dir, param_stamp)
    utils.save_object(precision_dict, file_name)

    #-------------------------------------------------------------------------------------------------#

    #--------------------#
    #----- PLOTTING -----#
    #--------------------#

    # If requested, generate pdf
    if pp is not None:
        # -create list to store all figures to be plotted.
        figure_list = []
        # -generate all figures (and store them in [figure_list])
        figure = visual_plt.plot_lines(
            precision_dict["all_tasks"],
            x_axes=precision_dict["x_task"],
            line_names=['task {}'.format(i + 1) for i in range(args.tasks)])
        figure_list.append(figure)
        figure = visual_plt.plot_lines([precision_dict["average"]],
                                       x_axes=precision_dict["x_task"],
                                       line_names=['average all tasks so far'])
        figure_list.append(figure)
        # -add figures to pdf (and close this pdf).
        for figure in figure_list:
            pp.savefig(figure)

    # Close pdf
    if pp is not None:
        pp.close()
Example #3
0
def run(args, verbose=False):
    # Set default arguments & check for incompatible options
    args.lr_gen = args.lr if args.lr_gen is None else args.lr_gen
    args.g_iters = args.iters if args.g_iters is None else args.g_iters
    args.g_fc_lay = args.fc_lay if args.g_fc_lay is None else args.g_fc_lay
    args.g_fc_uni = args.fc_units if args.g_fc_uni is None else args.g_fc_uni
    # -if [log_per_task], reset all logs
    if args.log_per_task:
        args.prec_log = args.iters
        args.loss_log = args.iters
        args.sample_log = args.iters
    # -if [iCaRL] is selected, select all accompanying options
    if hasattr(args, "icarl") and args.icarl:
        args.use_exemplars = True
        args.add_exemplars = True
        args.bce = True
        args.bce_distill = True
    # -if XdG is selected but not the Task-IL scenario, give error
    if (not args.scenario == "task") and args.xdg:
        raise ValueError("'XdG' is only compatible with the Task-IL scenario.")
    # -if EWC, SI, XdG, A-GEM or iCaRL is selected together with 'feedback', give error
    if args.feedback and (args.ewc or args.si or args.xdg or args.icarl
                          or args.agem):
        raise NotImplementedError(
            "EWC, SI, XdG, A-GEM and iCaRL are not supported with feedback connections."
        )
    # -if A-GEM is selected without any replay, give warning
    if args.agem and args.replay == "none":
        raise Warning(
            "The '--agem' flag is selected, but without any type of replay. "
            "For the original A-GEM method, also select --replay='exemplars'.")
    # -if EWC, SI, XdG, A-GEM or iCaRL is selected together with offline-replay, give error
    if args.replay == "offline" and (args.ewc or args.si or args.xdg
                                     or args.icarl or args.agem):
        raise NotImplementedError(
            "Offline replay cannot be combined with EWC, SI, XdG, A-GEM or iCaRL."
        )
    # -if binary classification loss is selected together with 'feedback', give error
    if args.feedback and args.bce:
        raise NotImplementedError(
            "Binary classification loss not supported with feedback connections."
        )
    # -if XdG is selected together with both replay and EWC, give error (either one of them alone with XdG is fine)
    if (args.xdg and args.gating_prop > 0) and (
            not args.replay == "none") and (args.ewc or args.si):
        raise NotImplementedError(
            "XdG is not supported with both '{}' replay and EWC / SI.".format(
                args.replay))
        # --> problem is that applying different task-masks interferes with gradient calculation
        #    (should be possible to overcome by calculating backward step on EWC/SI-loss also for each mask separately)
    # -if 'BCEdistill' is selected for other than scenario=="class", give error
    if args.bce_distill and not args.scenario == "class":
        raise ValueError(
            "BCE-distill can only be used for class-incremental learning.")
    # -create plots- and results-directories if needed
    if not os.path.isdir(args.r_dir):
        os.mkdir(args.r_dir)
    if args.pdf and not os.path.isdir(args.p_dir):
        os.mkdir(args.p_dir)

    scenario = args.scenario
    # If Task-IL scenario is chosen with single-headed output layer, set args.scenario to "domain"
    # (but note that when XdG is used, task-identity information is being used so the actual scenario is still Task-IL)
    if args.singlehead and args.scenario == "task":
        scenario = "domain"

    # If only want param-stamp, get it printed to screen and exit
    if hasattr(args, "get_stamp") and args.get_stamp:
        print(get_param_stamp_from_args(args=args))
        exit()

    # Use cuda?
    cuda = torch.cuda.is_available() and args.cuda
    device = torch.device("cuda" if cuda else "cpu")
    if verbose:
        print("CUDA is {}used".format("" if cuda else "NOT(!!) "))

    # Set random seeds
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)

    # -------------------------------------------------------------------------------------------------#

    # ----------------#
    # ----- DATA -----#
    # ----------------#

    # Prepare data for chosen experiment
    if verbose:
        print("\nPreparing the data...")
    (train_datasets,
     test_datasets), config, classes_per_task = get_multitask_experiment(
         name=args.experiment,
         scenario=scenario,
         tasks=args.tasks,
         data_dir=args.d_dir,
         verbose=verbose,
         exception=True if args.seed == 0 else False,
     )

    # -------------------------------------------------------------------------------------------------#

    # ------------------------------#
    # ----- MODEL (CLASSIFIER) -----#
    # ------------------------------#

    # Define main model (i.e., classifier, if requested with feedback connections)
    if args.feedback:
        model = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            z_dim=args.z_dim,
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        model.lamda_pl = 1.  # --> to make that this VAE is also trained to classify
    else:
        model = Classifier(
            image_size=config['size'],
            image_channels=config['channels'],
            classes=config['classes'],
            fc_layers=args.fc_lay,
            fc_units=args.fc_units,
            fc_drop=args.fc_drop,
            fc_nl=args.fc_nl,
            fc_bn=True if args.fc_bn == "yes" else False,
            excit_buffer=True if args.xdg and args.gating_prop > 0 else False,
            binaryCE=args.bce,
            binaryCE_distill=args.bce_distill,
            AGEM=args.agem,
        ).to(device)

    # Define optimizer (only include parameters that "requires_grad")
    model.optim_list = [{
        'params':
        filter(lambda p: p.requires_grad, model.parameters()),
        'lr':
        args.lr
    }]
    model.optim_type = args.optimizer
    if model.optim_type in ("adam", "adam_reset"):
        model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))
    elif model.optim_type == "sgd":
        model.optimizer = optim.SGD(model.optim_list)
    else:
        raise ValueError(
            "Unrecognized optimizer, '{}' is not currently a valid option".
            format(args.optimizer))

    # -------------------------------------------------------------------------------------------------#

    # ----------------------------------#
    # ----- CL-STRATEGY: EXEMPLARS -----#
    # ----------------------------------#

    # Store in model whether, how many and in what way to store exemplars
    if isinstance(model, ExemplarHandler) and (args.use_exemplars
                                               or args.add_exemplars
                                               or args.replay == "exemplars"):
        model.memory_budget = args.budget
        model.norm_exemplars = args.norm_exemplars
        model.herding = args.herding

    # -------------------------------------------------------------------------------------------------#

    # -----------------------------------#
    # ----- CL-STRATEGY: ALLOCATION -----#
    # -----------------------------------#

    # Elastic Weight Consolidation (EWC)
    if isinstance(model, ContinualLearner):
        model.ewc_lambda = args.ewc_lambda if args.ewc else 0
        if args.ewc:
            model.fisher_n = args.fisher_n
            model.gamma = args.gamma
            model.online = args.online
            model.emp_FI = args.emp_fi

    # Synpatic Intelligence (SI)
    if isinstance(model, ContinualLearner):
        model.si_c = args.si_c if args.si else 0
        if args.si:
            model.epsilon = args.epsilon

    # XdG: create for every task a "mask" for each hidden fully connected layer
    if isinstance(model, ContinualLearner) and (args.xdg
                                                and args.gating_prop > 0):
        mask_dict = {}
        excit_buffer_list = []
        for task_id in range(args.tasks):
            mask_dict[task_id + 1] = {}
            for i in range(model.fcE.layers):
                layer = getattr(model.fcE, "fcLayer{}".format(i + 1)).linear
                if task_id == 0:
                    excit_buffer_list.append(layer.excit_buffer)
                n_units = len(layer.excit_buffer)
                gated_units = np.random.choice(n_units,
                                               size=int(args.gating_prop *
                                                        n_units),
                                               replace=False)
                mask_dict[task_id + 1][i] = gated_units
        model.mask_dict = mask_dict
        model.excit_buffer_list = excit_buffer_list

    # -------------------------------------------------------------------------------------------------#

    # -------------------------------#
    # ----- CL-STRATEGY: REPLAY -----#
    # -------------------------------#

    # Use distillation loss (i.e., soft targets) for replayed data? (and set temperature)
    if isinstance(model, Replayer):
        model.replay_targets = "soft" if args.distill else "hard"
        model.KD_temp = args.temp

    # If needed, specify separate model for the generator
    train_gen = True if (args.replay == "generative"
                         and not args.feedback) else False
    if train_gen:
        # -specify architecture
        generator = AutoEncoder(
            image_size=config['size'],
            image_channels=config['channels'],
            fc_layers=args.g_fc_lay,
            fc_units=args.g_fc_uni,
            z_dim=args.g_z_dim,
            classes=config['classes'],
            fc_drop=args.fc_drop,
            fc_bn=True if args.fc_bn == "yes" else False,
            fc_nl=args.fc_nl,
        ).to(device)
        # -set optimizer(s)
        generator.optim_list = [{
            'params':
            filter(lambda p: p.requires_grad, generator.parameters()),
            'lr':
            args.lr_gen
        }]
        generator.optim_type = args.optimizer
        if generator.optim_type in ("adam", "adam_reset"):
            generator.optimizer = optim.Adam(generator.optim_list,
                                             betas=(0.9, 0.999))
        elif generator.optim_type == "sgd":
            generator.optimizer = optim.SGD(generator.optim_list)
    else:
        generator = None

    # -------------------------------------------------------------------------------------------------#

    # ---------------------#
    # ----- REPORTING -----#
    # ---------------------#

    # Get parameter-stamp (and print on screen)
    if verbose:
        print("\nParameter-stamp...")
    param_stamp = get_param_stamp(
        args,
        model.name,
        verbose=verbose,
        replay=True if (not args.replay == "none") else False,
        replay_model_name=generator.name if
        (args.replay == "generative" and not args.feedback) else None,
    )

    # Print some model-characteristics on the screen
    if verbose:
        # -main model
        utils.print_model_info(model, title="MAIN MODEL")
        # -generator
        if generator is not None:
            utils.print_model_info(generator, title="GENERATOR")

    # Prepare for keeping track of statistics required for metrics (also used for plotting in pdf)
    if args.pdf or args.metrics:
        # -define [metrics_dict] to keep track of performance during training for storing & for later plotting in pdf
        metrics_dict = evaluate.initiate_metrics_dict(n_tasks=args.tasks,
                                                      scenario=args.scenario)
        # -evaluate randomly initiated model on all tasks & store accuracies in [metrics_dict] (for calculating metrics)
        if not args.use_exemplars:
            metrics_dict = evaluate.intial_accuracy(
                model,
                test_datasets,
                metrics_dict,
                classes_per_task=classes_per_task,
                scenario=scenario,
                test_size=None,
                no_task_mask=False)
    else:
        metrics_dict = None

    # Prepare for plotting in visdom
    # -visdom-settings
    if args.visdom:
        env_name = "{exp}{tasks}-{scenario}".format(exp=args.experiment,
                                                    tasks=args.tasks,
                                                    scenario=args.scenario)
        graph_name = "{fb}{replay}{syn}{ewc}{xdg}{icarl}{bud}".format(
            fb="1M-" if args.feedback else "",
            replay="{}{}{}".format(args.replay, "D" if args.distill else "",
                                   "-aGEM" if args.agem else ""),
            syn="-si{}".format(args.si_c) if args.si else "",
            ewc="-ewc{}{}".format(
                args.ewc_lambda, "-O{}".format(args.gamma)
                if args.online else "") if args.ewc else "",
            xdg="" if (not args.xdg) or args.gating_prop == 0 else
            "-XdG{}".format(args.gating_prop),
            icarl="-iCaRL" if (args.use_exemplars and args.add_exemplars
                               and args.bce and args.bce_distill) else "",
            bud="-bud{}".format(args.budget) if
            (args.use_exemplars or args.add_exemplars
             or args.replay == "exemplars") else "",
        )
        visdom = {'env': env_name, 'graph': graph_name}
    else:
        visdom = None

    # -------------------------------------------------------------------------------------------------#

    # ---------------------#
    # ----- CALLBACKS -----#
    # ---------------------#

    # Callbacks for reporting on and visualizing loss
    generator_loss_cbs = [
        cb._VAE_loss_cb(
            log=args.loss_log,
            visdom=visdom,
            model=model if args.feedback else generator,
            tasks=args.tasks,
            iters_per_task=args.iters if args.feedback else args.g_iters,
            replay=False if args.replay == "none" else True)
    ] if (train_gen or args.feedback) else [None]
    solver_loss_cbs = [
        cb._solver_loss_cb(log=args.loss_log,
                           visdom=visdom,
                           model=model,
                           tasks=args.tasks,
                           iters_per_task=args.iters,
                           replay=False if args.replay == "none" else True)
    ] if (not args.feedback) else [None]

    # Callbacks for evaluating and plotting generated / reconstructed samples
    sample_cbs = [
        cb._sample_cb(
            log=args.sample_log,
            visdom=visdom,
            config=config,
            test_datasets=test_datasets,
            sample_size=args.sample_n,
            iters_per_task=args.iters if args.feedback else args.g_iters)
    ] if (train_gen or args.feedback) else [None]

    # Callbacks for reporting and visualizing accuracy
    # -visdom (i.e., after each [prec_log]
    eval_cbs = [
        cb._eval_cb(log=args.prec_log,
                    test_datasets=test_datasets,
                    visdom=visdom,
                    iters_per_task=args.iters,
                    test_size=args.prec_n,
                    classes_per_task=classes_per_task,
                    scenario=scenario,
                    with_exemplars=False)
    ] if (not args.use_exemplars) else [None]
    # --> during training on a task, evaluation cannot be with exemplars as those are only selected after training
    #    (instead, evaluation for visdom is only done after each task, by including callback-function into [metric_cbs])

    # Callbacks for calculating statists required for metrics
    # -pdf / reporting: summary plots (i.e, only after each task) (when using exemplars, also for visdom)
    metric_cbs = [
        cb._metric_cb(log=args.iters,
                      test_datasets=test_datasets,
                      classes_per_task=classes_per_task,
                      metrics_dict=metrics_dict,
                      scenario=scenario,
                      iters_per_task=args.iters,
                      with_exemplars=args.use_exemplars),
        cb._eval_cb(log=args.iters,
                    test_datasets=test_datasets,
                    visdom=visdom,
                    iters_per_task=args.iters,
                    test_size=args.prec_n,
                    classes_per_task=classes_per_task,
                    scenario=scenario,
                    with_exemplars=True) if args.use_exemplars else None
    ]

    # -------------------------------------------------------------------------------------------------#

    # --------------------#
    # ----- TRAINING -----#
    # --------------------#

    if verbose:
        print("\nTraining...")
    # Keep track of training-time
    start = time.time()
    # Train model
    train_cl(
        model,
        train_datasets,
        replay_mode=args.replay,
        scenario=scenario,
        classes_per_task=classes_per_task,
        iters=args.iters,
        batch_size=args.batch,
        generator=generator,
        gen_iters=args.g_iters,
        gen_loss_cbs=generator_loss_cbs,
        sample_cbs=sample_cbs,
        eval_cbs=eval_cbs,
        loss_cbs=generator_loss_cbs if args.feedback else solver_loss_cbs,
        metric_cbs=metric_cbs,
        use_exemplars=args.use_exemplars,
        add_exemplars=args.add_exemplars,
        param_stamp=param_stamp,
    )
    # Get total training-time in seconds, and write to file
    if args.time:
        training_time = time.time() - start
        time_file = open("{}/time-{}.txt".format(args.r_dir, param_stamp), 'w')
        time_file.write('{}\n'.format(training_time))
        time_file.close()

    # -------------------------------------------------------------------------------------------------#

    # ----------------------#
    # ----- EVALUATION -----#
    # ----------------------#

    if verbose:
        print("\n\nEVALUATION RESULTS:")

    # Evaluate precision of final model on full test-set
    precs = [
        evaluate.validate(
            model,
            test_datasets[i],
            verbose=False,
            test_size=None,
            task=i + 1,
            with_exemplars=False,
            allowed_classes=list(
                range(classes_per_task * i, classes_per_task *
                      (i + 1))) if scenario == "task" else None)
        for i in range(args.tasks)
    ]
    average_precs = sum(precs) / args.tasks
    # -print on screen
    if verbose:
        print("\n Precision on test-set{}:".format(
            " (softmax classification)" if args.use_exemplars else ""))
        for i in range(args.tasks):
            print(" - Task {} [{}-{}]: {:.4f}".format(
                i + 1, classes_per_task * i,
                classes_per_task * (i + 1) - 1, precs[i]))
        print('=> Average precision over all {} tasks: {:.4f}\n'.format(
            args.tasks, average_precs))

    # -with exemplars
    if args.use_exemplars:
        precs = [
            evaluate.validate(
                model,
                test_datasets[i],
                verbose=False,
                test_size=None,
                task=i + 1,
                with_exemplars=True,
                allowed_classes=list(
                    range(classes_per_task * i, classes_per_task *
                          (i + 1))) if scenario == "task" else None)
            for i in range(args.tasks)
        ]
        average_precs_ex = sum(precs) / args.tasks
        # -print on screen
        if verbose:
            print(" Precision on test-set (classification using exemplars):")
            for i in range(args.tasks):
                print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
            print('=> Average precision over all {} tasks: {:.4f}\n'.format(
                args.tasks, average_precs_ex))

    if args.metrics:
        # Accuracy matrix
        if args.scenario in ('task', 'domain'):
            R = pd.DataFrame(data=metrics_dict['acc per task'],
                             index=[
                                 'after task {}'.format(i + 1)
                                 for i in range(args.tasks)
                             ])
            R.loc['at start'] = metrics_dict['initial acc per task'] if (
                not args.use_exemplars) else ['NA' for _ in range(args.tasks)]
            R = R.reindex(
                ['at start'] +
                ['after task {}'.format(i + 1) for i in range(args.tasks)])
            BWTs = [(R.loc['after task {}'.format(args.tasks), 'task {}'.format(i + 1)] - \
                     R.loc['after task {}'.format(i + 1), 'task {}'.format(i + 1)]) for i in range(args.tasks - 1)]
            FWTs = [
                0. if args.use_exemplars else
                (R.loc['after task {}'.format(i + 1),
                       'task {}'.format(i + 2)] -
                 R.loc['at start', 'task {}'.format(i + 2)])
                for i in range(args.tasks - 1)
            ]
            forgetting = []
            for i in range(args.tasks - 1):
                forgetting.append(
                    max(R.iloc[1:args.tasks, i]) - R.iloc[args.tasks, i])
            R.loc['FWT (per task)'] = ['NA'] + FWTs
            R.loc['BWT (per task)'] = BWTs + ['NA']
            R.loc['F (per task)'] = forgetting + ['NA']
            BWT = sum(BWTs) / (args.tasks - 1)
            F = sum(forgetting) / (args.tasks - 1)
            FWT = sum(FWTs) / (args.tasks - 1)
            metrics_dict['BWT'] = BWT
            metrics_dict['F'] = F
            metrics_dict['FWT'] = FWT
            # -print on screen
            if verbose:
                print("Accuracy matrix")
                print(R)
                print("\nFWT = {:.4f}".format(FWT))
                print("BWT = {:.4f}".format(BWT))
                print("  F = {:.4f}\n\n".format(F))
        else:
            if verbose:
                # Accuracy matrix based only on classes in that task (i.e., evaluation as if Task-IL scenario)
                R = pd.DataFrame(
                    data=metrics_dict['acc per task (only classes in task)'],
                    index=[
                        'after task {}'.format(i + 1)
                        for i in range(args.tasks)
                    ])
                R.loc['at start'] = metrics_dict[
                    'initial acc per task (only classes in task)'] if not args.use_exemplars else [
                        'NA' for _ in range(args.tasks)
                    ]
                R = R.reindex(
                    ['at start'] +
                    ['after task {}'.format(i + 1) for i in range(args.tasks)])
                print(
                    "Accuracy matrix, based on only classes in that task ('as if Task-IL scenario')"
                )
                print(R)

                # Accuracy matrix, always based on all classes
                R = pd.DataFrame(
                    data=metrics_dict['acc per task (all classes)'],
                    index=[
                        'after task {}'.format(i + 1)
                        for i in range(args.tasks)
                    ])
                R.loc['at start'] = metrics_dict[
                    'initial acc per task (only classes in task)'] if not args.use_exemplars else [
                        'NA' for _ in range(args.tasks)
                    ]
                R = R.reindex(
                    ['at start'] +
                    ['after task {}'.format(i + 1) for i in range(args.tasks)])
                print("\nAccuracy matrix, always based on all classes")
                print(R)

                # Accuracy matrix, based on all classes thus far
                R = pd.DataFrame(data=metrics_dict[
                    'acc per task (all classes up to trained task)'],
                                 index=[
                                     'after task {}'.format(i + 1)
                                     for i in range(args.tasks)
                                 ])
                print(
                    "\nAccuracy matrix, based on all classes up to the trained task"
                )
                print(R)

            # Accuracy matrix, based on all classes up to the task being evaluated
            # (this is the accuracy-matrix used for calculating the metrics in the Class-IL scenario)
            R = pd.DataFrame(data=metrics_dict[
                'acc per task (all classes up to evaluated task)'],
                             index=[
                                 'after task {}'.format(i + 1)
                                 for i in range(args.tasks)
                             ])
            R.loc['at start'] = metrics_dict[
                'initial acc per task (only classes in task)'] if not args.use_exemplars else [
                    'NA' for _ in range(args.tasks)
                ]
            R = R.reindex(
                ['at start'] +
                ['after task {}'.format(i + 1) for i in range(args.tasks)])
            BWTs = [(R.loc['after task {}'.format(args.tasks), 'task {}'.format(i + 1)] - \
                     R.loc['after task {}'.format(i + 1), 'task {}'.format(i + 1)]) for i in range(args.tasks - 1)]
            FWTs = [
                0. if args.use_exemplars else
                (R.loc['after task {}'.format(i + 1),
                       'task {}'.format(i + 2)] -
                 R.loc['at start', 'task {}'.format(i + 2)])
                for i in range(args.tasks - 1)
            ]
            forgetting = []
            for i in range(args.tasks - 1):
                forgetting.append(
                    max(R.iloc[1:args.tasks, i]) - R.iloc[args.tasks, i])
            R.loc['FWT (per task)'] = ['NA'] + FWTs
            R.loc['BWT (per task)'] = BWTs + ['NA']
            R.loc['F (per task)'] = forgetting + ['NA']
            BWT = sum(BWTs) / (args.tasks - 1)
            F = sum(forgetting) / (args.tasks - 1)
            FWT = sum(FWTs) / (args.tasks - 1)
            metrics_dict['BWT'] = BWT
            metrics_dict['F'] = F
            metrics_dict['FWT'] = FWT
            # -print on screen
            if verbose:
                print(
                    "\nAccuracy matrix, based on all classes up to the evaluated task"
                )
                print(R)
                print("\n=> FWT = {:.4f}".format(FWT))
                print("=> BWT = {:.4f}".format(BWT))
                print("=>  F = {:.4f}\n".format(F))

    if verbose and args.time:
        print(
            "=> Total training time = {:.1f} seconds\n".format(training_time))

    # -------------------------------------------------------------------------------------------------#

    # ------------------#
    # ----- OUTPUT -----#
    # ------------------#

    # Average precision on full test set
    output_file = open("{}/prec-{}.txt".format(args.r_dir, param_stamp), 'w')
    output_file.write('{}\n'.format(
        average_precs_ex if args.use_exemplars else average_precs))
    output_file.close()
    # -metrics-dict
    if args.metrics:
        file_name = "{}/dict-{}".format(args.r_dir, param_stamp)
        utils.save_object(metrics_dict, file_name)

    # -------------------------------------------------------------------------------------------------#

    # --------------------#
    # ----- PLOTTING -----#
    # --------------------#

    # If requested, generate pdf
    if args.pdf:
        # -open pdf
        plot_name = "{}/{}.pdf".format(args.p_dir, param_stamp)
        pp = visual_plt.open_pdf(plot_name)

        # -show samples and reconstructions (either from main model or from separate generator)
        if args.feedback or args.replay == "generative":
            evaluate.show_samples(model if args.feedback else generator,
                                  config,
                                  size=args.sample_n,
                                  pdf=pp)
            for i in range(args.tasks):
                evaluate.show_reconstruction(
                    model if args.feedback else generator,
                    test_datasets[i],
                    config,
                    pdf=pp,
                    task=i + 1)

        # -show metrics reflecting progression during training
        figure_list = []  # -> create list to store all figures to be plotted

        # -generate all figures (and store them in [figure_list])
        key = "acc per task ({} task)".format(
            "all classes up to trained" if scenario ==
            'class' else "only classes in")
        plot_list = []
        for i in range(args.tasks):
            plot_list.append(metrics_dict[key]["task {}".format(i + 1)])
        figure = visual_plt.plot_lines(
            plot_list,
            x_axes=metrics_dict["x_task"],
            line_names=['task {}'.format(i + 1) for i in range(args.tasks)])
        figure_list.append(figure)
        figure = visual_plt.plot_lines([metrics_dict["average"]],
                                       x_axes=metrics_dict["x_task"],
                                       line_names=['average all tasks so far'])
        figure_list.append(figure)

        # -add figures to pdf (and close this pdf).
        for figure in figure_list:
            pp.savefig(figure)

        # -close pdf
        pp.close()

        # -print name of generated plot on screen
        if verbose:
            print("\nGenerated plot: {}\n".format(plot_name))
def run(args):

    if not args.single_test:
        import pidfile
        resfile = pidfile.exclusive_dirfn(
            os.path.join(args.r_dir, args.save_dir))

    if args.log_per_task:
        args.prec_log = args.iters
        args.loss_log = args.iters

    # -create plots- and results-directories if needed
    if not os.path.isdir(args.r_dir):
        os.mkdir(args.r_dir)
    if args.pdf and not os.path.isdir(args.p_dir):
        os.mkdir(args.p_dir)

    # set cuda
    cuda = torch.cuda.is_available() and args.cuda
    device = torch.device("cuda" if cuda else "cpu")

    # set random seeds
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if cuda:
        torch.cuda.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)

    scenario = args.scenario

    #-------------------------------------------------------------------------------------------------
    # DATA
    #-------------------------------------------------------------------------------------------------
    (train_datasets, test_datasets), config = get_multitask_experiment(
        args,
        name=args.experiment,
        scenario=scenario,
        tasks=args.tasks,
        data_dir=args.d_dir,
        verbose=True,
        exception=True if args.seed == 0 else False,
    )
    args.tasks = len(config['labels_per_task'])
    args.labels_per_task = config['labels_per_task']
    if not args.task_boundary:
        args.iterations_per_virtual_epc = config['iterations_per_virtual_epc']
        args.task_dict = config['task_dict']

    #-------------------------------------------------------------------------------------------------
    # MODEL
    #-------------------------------------------------------------------------------------------------
    if args.ebm:
        model = EBM(args,
                    image_size=config['size'],
                    image_channels=config['channels'],
                    classes=config['num_classes'],
                    fc_units=args.fc_units).to(device)
    else:
        model = Classifier(args,
                           image_size=config['size'],
                           image_channels=config['channels'],
                           classes=config['num_classes'],
                           fc_units=args.fc_units).to(device)

    if args.experiment == 'cifar100':
        model = utils.init_params(model, args)
        for param in model.convE.parameters():
            param.requires_grad = False

    if args.pretrain:
        checkpoint = torch.load(args.pretrain)
        best_acc = checkpoint['best_acc']
        checkpoint_state = checkpoint['state_dict']

        print(
            '-----------------------------------------------------------------------------'
        )
        print('load pretrained model %s' % args.pretrain)
        print('best_acc', best_acc)
        print(
            '-----------------------------------------------------------------------------'
        )

        model_dict = model.fcE.state_dict()
        checkpoint_state = {
            k[7:]: v
            for k, v in checkpoint_state.items() if k[7:] in model_dict
        }  ## remove module.
        del checkpoint_state['classifier.weight']
        del checkpoint_state['classifier.bias']
        if 'y_ebm.weight' in checkpoint_state:
            del checkpoint_state['y_ebm.weight']
        model_dict.update(checkpoint_state)
        model.fcE.load_state_dict(model_dict)

        for param in model.fcE.model.parameters():
            param.requires_grad = False

    model.optim_list = [{
        'params':
        filter(lambda p: p.requires_grad, model.parameters()),
        'lr':
        args.lr
    }]
    model.optim_type = args.optimizer

    if model.optim_type in ("adam", "adam_reset"):
        model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))
    elif model.optim_type == "sgd":
        model.optimizer = optim.SGD(model.optim_list)
    else:
        raise ValueError(
            "Unrecognized optimizer, '{}' is not currently a valid option".
            format(args.optimizer))

    #-------------------------------------------------------------------------------------------------
    # CL-STRATEGY: ALLOCATION
    #-------------------------------------------------------------------------------------------------

    # Elastic Weight Consolidation (EWC)
    if isinstance(model, ContinualLearner):
        model.ewc_lambda = args.ewc_lambda if args.ewc else 0
        if args.ewc:
            model.fisher_n = args.fisher_n
            model.gamma = args.gamma
            model.online = args.online
            model.emp_FI = args.emp_fi

    # Synpatic Intelligence (SI)
    if isinstance(model, ContinualLearner):
        model.si_c = args.si_c if args.si else 0
        if args.si:
            model.epsilon = args.epsilon

    #-------------------------------------------------------------------------------------------------
    # Get parameter-stamp (and print on screen)
    #-------------------------------------------------------------------------------------------------
    param_stamp = get_param_stamp(args, model.name, verbose=True)
    param_stamp = param_stamp + '--' + args.model_name

    # -define [precision_dict] to keep track of performance during training for storing and for later plotting in pdf
    precision_dict = evaluate.initiate_precision_dict(args.tasks)

    #-------------------------------------------------------------------------------------------------#

    #---------------------#
    #----- CALLBACKS -----#
    #---------------------#
    solver_loss_cbs = [
        cb._solver_loss_cb(log=args.loss_log,
                           model=model,
                           tasks=args.tasks,
                           iters_per_task=args.iters)
    ]

    eval_cb = cb._eval_cb(log=args.prec_log,
                          test_datasets=test_datasets,
                          visdom=args.visdom,
                          precision_dict=None,
                          iters_per_task=args.iters,
                          test_size=args.prec_n,
                          labels_per_task=config['labels_per_task'],
                          scenario=scenario)
    eval_cb_full = cb._eval_cb(log=args.iters,
                               test_datasets=test_datasets,
                               precision_dict=precision_dict,
                               iters_per_task=args.iters,
                               labels_per_task=config['labels_per_task'],
                               scenario=scenario)
    eval_cbs = [eval_cb, eval_cb_full]

    #-------------------------------------------------------------------------------------------------
    # TRAINING
    #-------------------------------------------------------------------------------------------------
    print("--> Training:")
    start = time.time()

    if args.task_boundary:
        train_cl(args,
                 model,
                 train_datasets,
                 scenario=scenario,
                 labels_per_task=config['labels_per_task'],
                 iters=args.iters,
                 batch_size=args.batch,
                 eval_cbs=eval_cbs,
                 loss_cbs=solver_loss_cbs)
    else:
        train_cl_noboundary(args,
                            model,
                            train_datasets,
                            scenario=scenario,
                            labels_per_task=config['labels_per_task'],
                            iters=args.iters,
                            batch_size=args.batch,
                            eval_cbs=eval_cbs,
                            loss_cbs=solver_loss_cbs)

    training_time = time.time() - start

    #-------------------------------------------------------------------------------------------------
    # EVALUATION
    #-------------------------------------------------------------------------------------------------
    print("\n\n--> Evaluation ({}-incremental learning scenario):".format(
        args.scenario))
    if args.ebm:
        precs = [
            evaluate.validate_ebm(args,
                                  model,
                                  test_datasets[i],
                                  verbose=False,
                                  test_size=None,
                                  task=i + 1,
                                  with_exemplars=False,
                                  current_task=args.tasks)
            for i in range(args.tasks)
        ]
    else:
        precs = [
            evaluate.validate(args,
                              model,
                              test_datasets[i],
                              verbose=False,
                              test_size=None,
                              task=i + 1,
                              with_exemplars=False,
                              current_task=args.tasks)
            for i in range(args.tasks)
        ]

    print("\n Precision on test-set (softmax classification):")
    for i in range(args.tasks):
        print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
    average_precs = sum(precs) / args.tasks
    print('average precision over all {} tasks: {:.4f}'.format(
        args.tasks, average_precs))

    #-------------------------------------------------------------------------------------------------
    # OUTPUT
    #-------------------------------------------------------------------------------------------------
    if not os.path.exists(os.path.join(args.r_dir, args.save_dir)):
        os.makedirs(os.path.join(args.r_dir, args.save_dir))

    output_file = open(
        "{}/{}/{}.txt".format(args.r_dir, args.save_dir, param_stamp), 'w')
    output_file.write("Training time {} \n".format(training_time))
    for i in range(args.tasks):
        output_file.write(" - Task {}: {:.4f}".format(i + 1, precs[i]))
        output_file.write("\n")
    output_file.write(' - Average {}\n'.format(average_precs))
    output_file.close()
    file_name = "{}/{}/{}".format(args.r_dir, args.save_dir, param_stamp)
    utils.save_object(precision_dict, file_name)

    if args.pdf:
        pp = visual_plt.open_pdf("{}/{}/{}.pdf".format(args.r_dir,
                                                       args.save_dir,
                                                       param_stamp))
        # -show metrics reflecting progression during training
        figure_list = []  #-> create list to store all figures to be plotted
        # -generate all figures (and store them in [figure_list])
        figure = visual_plt.plot_lines(
            precision_dict["all_tasks"],
            x_axes=precision_dict["x_task"],
            line_names=['task {}'.format(i + 1) for i in range(args.tasks)])
        figure_list.append(figure)
        figure = visual_plt.plot_lines([precision_dict["average"]],
                                       x_axes=precision_dict["x_task"],
                                       line_names=['average all tasks so far'])
        figure_list.append(figure)
        # -add figures to pdf (and close this pdf).
        for figure in figure_list:
            pp.savefig(figure)

        pp.close()

    if not args.single_test:
        resfile.done()
def train_and_evaluate(cfg):

    #Training settings
    experiment_dir = os.path.join('experiments', cfg.exp_type, cfg.save_dir)
    if not os.path.exists(experiment_dir):
        os.makedirs(experiment_dir)

    utils.set_logger(os.path.join(experiment_dir, cfg.log))
    logging.info('-----------Starting Experiment------------')
    use_cuda = cfg.use_cuda and torch.cuda.is_available()
    cfg.use_cuda = use_cuda
    device = torch.device(
        "cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu")
    # initialize the tensorbiard summary writer
    writer = SummaryWriter(experiment_dir + '/tboard')

    ## get the dataloaders
    dloader_train, dloader_val, dloader_test = dataloaders.get_dataloaders(
        cfg, val_split=.2)

    # Load the model
    model = models.get_model(cfg)

    if cfg.ssl_pretrained_exp_path:
        ssl_exp_dir = experiment_dir = os.path.join('experiments',\
                                        'self-supervised',cfg.ssl_pretrained_exp_path)
        state_dict = torch.load(os.path.join(ssl_exp_dir,cfg.ssl_weight),\
                                map_location=device)
        # the stored dict has 3 informations - epoch,state_dict and optimizer
        state_dict = state_dict['state_dict']
        del state_dict['fc.weight']
        del state_dict['fc.bias']

    model.load_state_dict(state_dict, strict=False)

    # Only finetune fc layer
    for name, param in model.named_parameters():
        if 'fc' not in name:
            param.requires_grad = False

    model = model.to(device)

    images, _, _, _ = next(iter(dloader_train))
    images = images.to(device)
    writer.add_graph(model, images)

    # follow the same setting as RotNet paper
    optimizer = optim.SGD(model.parameters(),
                          lr=float(cfg.lr),
                          momentum=float(cfg.momentum),
                          weight_decay=5e-4,
                          nesterov=True)
    if cfg.scheduler:
        scheduler = lr_scheduler.MultiStepLR(optimizer,
                                             milestones=[60, 120, 160, 200],
                                             gamma=0.2)
    else:
        scheduler = None
    criterion = nn.CrossEntropyLoss()

    best_loss = 1000
    for epoch in range(cfg.num_epochs + 1):

        #        print('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs))
        logging.info('\nTrain for Epoch: {}/{}'.format(epoch, cfg.num_epochs))
        train_loss, train_acc = train(epoch, model, device, dloader_train,
                                      optimizer, scheduler, criterion,
                                      experiment_dir, writer)

        # validate after every epoch
        #        print('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs))
        logging.info('\nValidate for Epoch: {}/{}'.format(
            epoch, cfg.num_epochs))
        val_loss, val_acc = validate(epoch, model, device, dloader_val,
                                     criterion, experiment_dir, writer)
        logging.info(
            'Val Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(
                epoch + 1, val_loss, val_acc))

        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)
        if epoch % cfg.save_intermediate_weights == 0:
            utils.save_checkpoint({'Epoch': epoch,'state_dict': model.state_dict(),
                                   'optim_dict' : optimizer.state_dict()},
                                    is_best, experiment_dir, checkpoint='{}_{}rot_epoch{}_checkpoint.pth'.format( cfg.network.lower(), str(cfg.num_rot),str(epoch)),\

                                    best_model='{}_{}rot_epoch{}_best.pth'.format(cfg.network.lower(), str(cfg.num_rot),str(epoch))
                                    )
    writer.close()

    #    print('\nEvaluate on test')
    logging.info('\nEvaluate on test')
    test_loss, test_acc = test(model, device, dloader_test, criterion,
                               experiment_dir)
    logging.info('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(
        test_loss, test_acc))

    # save the configuration file within that experiment directory
    utils.save_yaml(cfg,
                    save_path=os.path.join(experiment_dir, 'config_sl.yaml'))
    logging.info('-----------End of Experiment------------')
 def test_valid(self):
     fileName = self.param
     print fileName
     self.assertEqual(validate(fileName),0)
def train(model,
          device,
          train_loader,
          val_loader,
          test_loader,
          optimizer,
          epoch,
          batch_log_interval=10,
          patience=20,
          min_delta=0.003):
    """
    This function runs the training script of the model

    Args:
        model (obj): which model to train
        device (torch.device): device to run on, cpu or whether to enable cuda
        train_loader (torch.utils.data.dataloader.DataLoader): dataloader object
        val_loader (torch.utils.data.dataloader.DataLoader): dataloader object for validation
        test_loader (torch.utils.data.dataloader.DataLoader): dataloader object for testing at
            would-be early stopping iteration
        epoch (int): which epoch we're on
        optimizer (torch.optim obj): which optimizer to use
        batch_log_interval (int): how often to log results
        patience (int): how many iterations/batches (not epochs) we will tolerate a val_loss improvement < min_delta
        min_delta (float): early stopping threshold; if val_loss < min_delta for patience # of iterations, we consider
            early stopping to have occurred

    Returns
        stop (bool): whether early stopping would have occurred
    """
    print('min_delta:', min_delta)
    no_improvement_count = 0
    stop = False
    print('IN TRAIN, DEVICE:', device)

    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = model.loss(output, target)
        loss.backward()
        optimizer.step()

        # for each "iteration" (assuming that means batch), get validation loss for early stopping
        if batch_idx == 0:
            val_loss = validate(model, device, val_loader)
            prev_val_loss = val_loss
        else:
            prev_val_loss = val_loss
            val_loss = validate(model, device, val_loader)

        # if no improvement on this batch
        if abs(prev_val_loss - val_loss) < min_delta:
            no_improvement_count += 1
        else:
            no_improvement_count = 0

        # trigger early stopping
        if no_improvement_count == patience:
            print(
                'Early Stopping Triggered at iteration {} within epoch. Done Training. val_loss = {:.6f}, prev_val_loss = {:.6f}'
                .format(batch_idx, val_loss, prev_val_loss))
            test(model, device, test_loader)
            stop = True
            return stop, batch_idx

        if batch_idx % batch_log_interval == 0:
            print(
                'Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item()))

    return stop, None
Example #8
0
def train_and_evaluate(cfg):
    
    #Training settings
    experiment_dir = os.path.join('experiments',cfg.exp_type,cfg.save_dir)
    if not os.path.exists(experiment_dir):
        os.makedirs(experiment_dir)
        
    utils.set_logger(os.path.join(experiment_dir,cfg.log))
    logging.info('-----------Starting Experiment------------')
    use_cuda = cfg.use_cuda and torch.cuda.is_available()
    cfg.use_cuda=use_cuda
    device = torch.device("cuda:{}".format(cfg.cuda_num) if use_cuda else "cpu")
    # initialize the tensorbiard summary writer
    #writer = SummaryWriter(experiment_dir + '/tboard' )
    logs=os.path.join('experiments',cfg.exp_type,'tboard_sup_demo')
    writer = SummaryWriter(logs + '/rotnet_without_pretrain' )

    ## get the dataloaders
    dloader_train,dloader_val,dloader_test = dataloaders.get_dataloaders(cfg)
    
    # Load the model
    model = models.get_model(cfg)
    
#    for name, param in model.named_parameters():
 #       param.requires_grad = False
  #      print(name)
        
   # model.avgpool=nn.AdaptiveAvgPool2d(output_size=(1, 1))
    #model.fc=nn.Linear(in_features=512, out_features=5, bias=True)
    if cfg.use_pretrained:
        pretrained_path =  os.path.join('experiments','supervised',cfg.pretrained_dir,cfg.pretrained_weights)
        state_dict = torch.load(pretrained_path,map_location=device)
        model.load_state_dict(state_dict, strict=False)
        logging.info('loading pretrained_weights {}'.format(cfg.pretrained_weights))

    if cfg.use_ssl:
        ssl_exp_dir = os.path.join('experiments',\
                                        'self-supervised',cfg.ssl_pretrained_exp_path)
        state_dict = torch.load(os.path.join(ssl_exp_dir,cfg.ssl_weight),\
                                map_location=device)
        # the stored dict has 3 informations - epoch,state_dict and optimizer
        state_dict=state_dict['state_dict']
        print(state_dict.keys())
        del state_dict['fc.weight']
        del state_dict['fc.bias']

        del state_dict['layer4.0.conv1.weight']
        del state_dict['layer4.0.conv2.weight']
        del state_dict['layer4.1.conv1.weight']
        del state_dict['layer4.1.conv2.weight']

        del state_dict['layer3.0.conv1.weight']
        del state_dict['layer3.0.conv2.weight']
        del state_dict['layer3.1.conv1.weight']
        del state_dict['layer3.1.conv2.weight']


        #del state_dict['layer2.0.conv1.weight']
        #del state_dict['layer2.0.conv2.weight']
        #del state_dict['layer2.1.conv1.weight']
        #del state_dict['layer2.1.conv2.weight']
    
        model.load_state_dict(state_dict, strict=False)
    
        # Only finetune fc layer
        #layers_list=['fc','avgpool','layer3.0.conv']#,'layer3.1.conv','layer4.0.conv','layer4.1.conv']
        #params_update=[]
        for name, param in model.named_parameters():
            #for l in layers_list:
                if 'fc' or 'layer3.0.conv' or 'layer3.1.conv' or'layer4.0.conv' or 'layer4.1.conv' in name:
                    param.requires_grad = True
###                    print(name)
                else:
                    param.requires_grad = False
#                    print(name)
                   # params_update.append(param)
#            print(param.requires_grad)
    
    model = model.to(device)

    images,_ ,_,_ = next(iter(dloader_train))
    images = images.to(device)
    writer.add_graph(model, images)

    # follow the same setting as RotNet paper
    #model.parameters()
    if cfg.opt=='sgd':
        optimizer = optim.SGD(model.parameters(), lr=float(cfg.lr), momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True)
    elif cfg.opt=='adam':
        optimizer = optim.Adam(model.parameters(), lr=float(cfg.lr))#, momentum=float(cfg.momentum), weight_decay=5e-4, nesterov=True)

    if cfg.scheduler:
        scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2)
    else:
        scheduler=None
    criterion = nn.CrossEntropyLoss()
    
    global iter_cnt
    iter_cnt=0
    best_loss = 1000
    for epoch in range(cfg.num_epochs):
        
#        print('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs))
        logging.info('\nTrain for Epoch: {}/{}'.format(epoch,cfg.num_epochs))
        train_loss,train_acc = train(epoch, model, device, dloader_train, optimizer, scheduler, criterion, experiment_dir, writer)
        
        # validate after every epoch
#        print('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs))
        logging.info('\nValidate for Epoch: {}/{}'.format(epoch,cfg.num_epochs))
        val_loss,val_acc = validate(epoch, model, device, dloader_val, criterion, experiment_dir, writer)
        logging.info('Val Epoch: {} Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(epoch, val_loss, val_acc))
        
       # for name, weight in model.named_parameters():
        #    writer.add_histogram(name,weight, epoch)
         #   writer.add_histogram(f'{name}.grad',weight.grad, epoch)
            
        is_best = val_loss < best_loss
        best_loss = min(val_loss, best_loss)
        if epoch % cfg.save_intermediate_weights==0 or is_best:
            utils.save_checkpoint({'Epoch': epoch,'state_dict': model.state_dict(),
                                   'optim_dict' : optimizer.state_dict()}, 
                                    is_best, experiment_dir, checkpoint='{}_epoch{}_checkpoint.pth'.format( cfg.network.lower(),str(epoch)),\
                                    
                                    best_model='{}_best.pth'.format(cfg.network.lower())
                                    )
    writer.close()
    
#    print('\nEvaluate on test')
    logging.info('\nEvaluate test result on best ckpt')
    state_dict = torch.load(os.path.join(experiment_dir,'{}_best.pth'.format(cfg.network.lower())),\
                                map_location=device)
    model.load_state_dict(state_dict, strict=False)

    test_loss,test_acc = test(model, device, dloader_test, criterion, experiment_dir)
    logging.info('Test: Avg Loss: {:.4f} \t Avg Acc: {:.4f}'.format(test_loss, test_acc))

    # save the configuration file within that experiment directory
    utils.save_yaml(cfg,save_path=os.path.join(experiment_dir,'config_sl.yaml'))
    logging.info('-----------End of Experiment------------')
Example #9
0
def train_cl(model, train_datasets, test_datasets, replay_mode="none", classes_per_task=None,
             iters=2000, batch_size=32,
             generator=None, gen_iters=0, gen_loss_cbs=list(), loss_cbs=list(), eval_cbs=list(), sample_cbs=list(),
             use_exemplars=True, add_exemplars=False, eval_cbs_exemplars=list(), savepath='./'):
    '''Train a model (with a "train_a_batch" method) on multiple tasks, with replay-strategy specified by [replay_mode].

    [model]             <nn.Module> main model to optimize across all tasks
    [train_datasets]    <list> with for each task the training <DataSet>
    [replay_mode]       <str>, choice from "generative", "exact", "current", "offline" and "none"
    [classes_per_task]  <int>, # of classes per task
    [iters]             <int>, # of optimization-steps (i.e., # of batches) per task
    [generator]         None or <nn.Module>, if a seperate generative model should be trained (for [gen_iters] per task)
    [*_cbs]             <list> of call-back functions to evaluate training-progress'''

    # Set model in training-mode
    model.train()

    # Use cuda?
    cuda = model._is_on_cuda()
    device = model._device()

    # Initiate possible sources for replay (no replay for 1st task)
    Exact = Generative = Current = False
    previous_model = None

    # Register starting param-values (needed for "intelligent synapses").
    if isinstance(model, ContinualLearner) and (model.si_c > 0):
        for n, p in model.named_parameters():
            if p.requires_grad:
                n = n.replace('.', '__')
                model.register_buffer('{}_SI_prev_task'.format(n), p.data.clone())

    # Loop over all tasks.
    for task, train_dataset in enumerate(train_datasets, 1):
        # If offline replay-setting, create large database of all tasks so far
        if replay_mode == "offline":
            train_dataset = ConcatDataset(train_datasets[:task])

        # Add exemplars (if available) to current dataset (if requested)
        if add_exemplars and task > 1:
            # ---------- ADHOC SOLUTION: permMNIST needs transform to tensor, while splitMNIST does not ---------- #
            if len(train_datasets) > 6:
                target_transform = lambda y, x=classes_per_task: torch.tensor(y % x)
            else:
                target_transform = lambda y, x=classes_per_task: y % x
            # ---------------------------------------------------------------------------------------------------- #
            exemplar_dataset = ExemplarDataset(model.exemplar_sets, target_transform=target_transform)
            training_dataset = ConcatDataset([train_dataset, exemplar_dataset])
        else:
            training_dataset = train_dataset

        # Prepare <dicts> to store running importance estimates and param-values before update ("Synaptic Intelligence")
        if isinstance(model, ContinualLearner) and (model.si_c > 0):
            W = {}
            p_old = {}
            for n, p in model.named_parameters():
                if p.requires_grad:
                    n = n.replace('.', '__')
                    W[n] = p.data.clone().zero_()
                    p_old[n] = p.data.clone()

        # Find [active_classes]
        active_classes = None  # -> for Domain-IL scenario, always all classes are active

        # Reset state of optimizer(s) for every task (if requested)
        if model.optim_type == "adam_reset":
            model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))
        if (generator is not None) and generator.optim_type == "adam_reset":
            generator.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))

        # Initialize # iters left on current data-loader(s)
        iters_left = iters_left_previous = 1

        # Define tqdm progress bar(s)
        progress = tqdm.tqdm(range(1, iters + 1))
        if generator is not None:
            progress_gen = tqdm.tqdm(range(1, gen_iters + 1))

        # Loop over all iterations
        iters_to_use = iters if (generator is None) else max(iters, gen_iters)
        for batch_index in range(1, iters_to_use + 1):

            # Update # iters left on current data-loader(s) and, if needed, create new one(s)
            iters_left -= 1
            if iters_left == 0:
                data_loader = iter(utils.get_data_loader(training_dataset, batch_size, cuda=cuda, drop_last=True))
                # NOTE:  [train_dataset]  is training-set of current task
                #      [training_dataset] is training-set of current task with stored exemplars added (if requested)
                iters_left = len(data_loader)
            if Exact:
                iters_left_previous -= 1
                if iters_left_previous == 0:
                    batch_size_to_use = min(batch_size, len(ConcatDataset(previous_datasets)))
                    data_loader_previous = iter(utils.get_data_loader(ConcatDataset(previous_datasets),
                                                                      batch_size_to_use, cuda=cuda, drop_last=True))
                    iters_left_previous = len(data_loader_previous)

            # -----------------Collect data------------------#

            #####-----CURRENT BATCH-----#####
            x, y = next(data_loader)  # --> sample training data of current task
            x, y = x.to(device), y.to(device)  # --> transfer them to correct device

            scores = None

            #####-----REPLAYED BATCH-----#####
            if not Exact and not Generative and not Current:
                x_ = y_ = scores_ = None  # -> if no replay

            ##-->> Exact Replay <<--##
            if Exact:
                scores_ = None

                # Sample replayed training data, move to correct device
                x_, y_ = next(data_loader_previous)
                x_ = x_.to(device)
                y_ = y_.to(device) if (model.replay_targets == "hard") else None
                # If required, get target scores (i.e, [scores_]         -- using previous model, with no_grad()
                if (model.replay_targets == "soft"):
                    with torch.no_grad():
                        scores_ = previous_model(x_)
                    scores_ = scores_

            ##-->> Generative / Current Replay <<--##
            if Generative or Current:
                # Get replayed data (i.e., [x_]) -- either current data or use previous generator
                x_ = x if Current else previous_generator.sample(batch_size)

                # Get target scores and labels (i.e., [scores_] / [y_]) -- using previous model, with no_grad()
                scores_ = all_scores_
                _, y_ = torch.max(scores_, dim=1)

                # Only keep predicted y/scores if required (as otherwise unnecessary computations will be done)
                y_ = y_ if (model.replay_targets == "hard") else None
                scores_ = scores_ if (model.replay_targets == "soft") else None

            # ---> Train MAIN MODEL
            if batch_index <= iters:

                # Train the main model with this batch
                loss_dict = model.train_a_batch(x, y, x_=x_, y_=y_, scores=scores, scores_=scores_,
                                                active_classes=active_classes, rnt=1. / task)

                # Update running parameter importance estimates in W
                if isinstance(model, ContinualLearner) and (model.si_c > 0):
                    for n, p in model.named_parameters():
                        if p.requires_grad:
                            n = n.replace('.', '__')
                            if p.grad is not None:
                                W[n].add_(-p.grad * (p.detach() - p_old[n]))
                            p_old[n] = p.detach().clone()

                # Fire callbacks (for visualization of training-progress / evaluating performance after each task)
                for loss_cb in loss_cbs:
                    if loss_cb is not None:
                        loss_cb(progress, batch_index, loss_dict, task=task)
                for eval_cb in eval_cbs:
                    if eval_cb is not None:
                        eval_cb(model, batch_index, task=task)
                if model.label == "VAE":
                    for sample_cb in sample_cbs:
                        if sample_cb is not None:
                            sample_cb(model, batch_index, task=task)

            # ---> Train GENERATOR
            if generator is not None and batch_index <= gen_iters:
                # Train the generator with this batch
                loss_dict = generator.train_a_batch(x, y, x_=x_, y_=y_, scores_=scores_, active_classes=active_classes,
                                                    rnt=1. / task)

                # Fire callbacks on each iteration
                for loss_cb in gen_loss_cbs:
                    if loss_cb is not None:
                        loss_cb(progress_gen, batch_index, loss_dict, task=task)
                for sample_cb in sample_cbs:
                    if sample_cb is not None:
                        sample_cb(generator, batch_index, task=task)

        ##----------> UPON FINISHING EACH TASK...

        # Close progres-bar(s)
        progress.close()
        if generator is not None:
            progress_gen.close()

        # EWC: estimate Fisher Information matrix (FIM) and update term for quadratic penalty
        if isinstance(model, ContinualLearner) and (model.ewc_lambda > 0):
            # -find allowed classes
            allowed_classes = None

            # -estimate FI-matrix
            model.estimate_fisher(training_dataset, allowed_classes=allowed_classes)

        # SI: calculate and update the normalized path integral
        if isinstance(model, ContinualLearner) and (model.si_c > 0):
            model.update_omega(W, model.epsilon)

        # EXEMPLARS: update exemplar sets
        if (add_exemplars or use_exemplars) or replay_mode == "exemplars":
            exemplars_per_class = int(np.floor(model.memory_budget / (classes_per_task * task)))
            # reduce examplar-sets
            model.reduce_exemplar_sets(exemplars_per_class)
            # for each new class trained on, construct examplar-set
            new_classes = list(range(classes_per_task))
            for class_id in new_classes:
                start = time.time()
                # create new dataset containing only all examples of this class
                class_dataset = SubDataset(original_dataset=train_dataset, sub_labels=[class_id])
                # based on this dataset, construct new exemplar-set for this class
                model.construct_exemplar_set(dataset=class_dataset, n=exemplars_per_class)
                print("Constructed exemplar-set for class {}: {} seconds".format(class_id, round(time.time() - start)))
            model.compute_means = True
            # evaluate this way of classifying on test set
            for eval_cb in eval_cbs_exemplars:
                if eval_cb is not None:
                    eval_cb(model, iters, task=task)

        # REPLAY: update source for replay
        previous_model = copy.deepcopy(model).eval()
        if replay_mode == 'generative':
            Generative = True
            previous_generator = copy.deepcopy(generator).eval() if generator is not None else previous_model
        elif replay_mode == 'current':
            Current = True
        elif replay_mode in ('exemplars', 'exact'):
            Exact = True
            if replay_mode == "exact":
                previous_datasets = train_datasets[:task]
            else:
                target_transform = (lambda y, x=classes_per_task: y % x)
                previous_datasets = [
                    ExemplarDataset(model.exemplar_sets, target_transform=target_transform)]

        precs = [evaluate.validate(
            model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=False,
            allowed_classes=None
        ) for i in range(len(test_datasets))]
        output.append(precs)

        precs5 = [evaluate.validate5(
            model, test_datasets[i], verbose=False, test_size=None, task=i + 1, with_exemplars=False,
            allowed_classes=None
        ) for i in range(len(test_datasets))]
        output5.append(precs5)

    os.makedirs(savepath + '/top5', exist_ok=True)
    savepath1 = savepath + '/' + str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + '.csv'
    f = open(savepath1, 'w')
    writer = csv.writer(f)
    writer.writerows(output)
    f.close()
    savepath5 = savepath + '/top5/' + str(datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')) + '.csv'
    f = open(savepath5, 'w')
    writer = csv.writer(f)
    writer.writerows(output5)
    f.close()
    print(savepath)
Example #10
0
def train_cl(model, train_datasets, replay_mode="none", scenario="class",classes_per_task=None,iters=2000,batch_size=32,
             generator=None, gen_iters=0, gen_loss_cbs=list(), loss_cbs=list(), eval_cbs=list(), sample_cbs=list(),
             use_exemplars=True, add_exemplars=False, eval_cbs_exemplars=list(), sparsity=0., x_tasks=5, test_datasets=None):
    '''Train a model (with a "train_a_batch" method) on multiple tasks, with replay-strategy specified by [replay_mode].

    [model]             <nn.Module> main model to optimize across all tasks
    [train_datasets]    <list> with for each task the training <DataSet>
    [replay_mode]       <str>, choice from "generative", "exact", "current", "offline" and "none"
    [scenario]          <str>, choice from "task", "domain" and "class"
    [classes_per_task]  <int>, # of classes per task
    [iters]             <int>, # of optimization-steps (i.e., # of batches) per task
    [generator]         None or <nn.Module>, if a seperate generative model should be trained (for [gen_iters] per task)
    [*_cbs]             <list> of call-back functions to evaluate training-progress'''


    # Set model in training-mode
    model.train()

    # Use cuda?
    cuda = model._is_on_cuda()
    device = model._device()

    # Initiate possible sources for replay (no replay for 1st task)
    Exact = Generative = Current = False
    previous_model = None

    # Register starting param-values (needed for "intelligent synapses").
    if isinstance(model, ContinualLearner) and (model.si_c>0):
        for n, p in model.named_parameters():
            if p.requires_grad:
                n = n.replace('.', '__')
                model.register_buffer('{}_SI_prev_task'.format(n), p.data.clone())

    # Loop over all tasks.
    for task, train_dataset in enumerate(train_datasets, 1):

        # If offline replay-setting, create large database of all tasks so far
        if replay_mode=="offline" and (not scenario=="task"):
            train_dataset = ConcatDataset(train_datasets[:task])
        # -but if "offline"+"task"-scenario: all tasks so far included in 'exact replay' & no current batch
        if replay_mode=="offline" and scenario == "task":
            Exact = True
            previous_datasets = train_datasets

        # Add exemplars (if available) to current dataset (if requested)
        if add_exemplars and task>1:
            # ---------- ADHOC SOLUTION: permMNIST needs transform to tensor, while splitMNIST does not ---------- #
            if len(train_datasets)>6:
                target_transform = (lambda y, x=classes_per_task: torch.tensor(y%x)) if (
                        scenario=="domain"
                ) else (lambda y: torch.tensor(y))
            else:
                target_transform = (lambda y, x=classes_per_task: y%x) if scenario=="domain" else None
            # ---------------------------------------------------------------------------------------------------- #
            exemplar_dataset = ExemplarDataset(model.exemplar_sets, target_transform=target_transform)
            training_dataset = ConcatDataset([train_dataset, exemplar_dataset])
        else:
            training_dataset = train_dataset

        # Prepare <dicts> to store running importance estimates and param-values before update ("Synaptic Intelligence")
        if isinstance(model, ContinualLearner) and (model.si_c>0):
            W = {}
            p_old = {}
            for n, p in model.named_parameters():
                if p.requires_grad:
                    n = n.replace('.', '__')
                    W[n] = p.data.clone().zero_()
                    p_old[n] = p.data.clone()

        # Find [active_classes]
        active_classes = None  # -> for Domain-IL scenario, always all classes are active
        if scenario == "task":
            # -for Task-IL scenario, create <list> with for all tasks so far a <list> with the active classes
            active_classes = [list(range(classes_per_task * i, classes_per_task * (i + 1))) for i in range(task)]
        elif scenario == "class":
            # -for Class-IL scenario, create one <list> with active classes of all tasks so far
            active_classes = list(range(classes_per_task * task))

        # Reset state of optimizer(s) for every task (if requested)
        if model.optim_type=="adam_reset":
            model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))
        if (generator is not None) and generator.optim_type=="adam_reset":
            generator.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999))

        # Initialize # iters left on current data-loader(s)
        iters_left = iters_left_previous = 1
        if scenario=="task":
            up_to_task = task if replay_mode=="offline" else task-1
            iters_left_previous = [1]*up_to_task
            data_loader_previous = [None]*up_to_task

        # Define tqdm progress bar(s)
        progress = tqdm.tqdm(range(1, iters+1))
        if generator is not None:
            progress_gen = tqdm.tqdm(range(1, gen_iters+1))

        # Loop over all iterations
        iters_to_use = iters if (generator is None) else max(iters, gen_iters)
        for batch_index in range(1, iters_to_use+1):

            # Update # iters left on current data-loader(s) and, if needed, create new one(s)
            iters_left -= 1
            if iters_left==0:
                data_loader = iter(utils.get_data_loader(training_dataset, batch_size, cuda=cuda, drop_last=True))
                # NOTE:  [train_dataset]  is training-set of current task
                #      [training_dataset] is training-set of current task with stored exemplars added (if requested)
                iters_left = len(data_loader)
            if Exact:
                if scenario=="task":
                    up_to_task = task if replay_mode=="offline" else task-1
                    batch_size_replay = int(np.ceil(batch_size/up_to_task)) if (up_to_task>1) else batch_size
                    # -in Task-IL scenario, need separate replay for each task
                    for task_id in range(up_to_task):
                        batch_size_to_use = min(batch_size_replay, len(previous_datasets[task_id]))
                        iters_left_previous[task_id] -= 1
                        if iters_left_previous[task_id]==0:
                            data_loader_previous[task_id] = iter(utils.get_data_loader(
                                train_datasets[task_id], batch_size_to_use, cuda=cuda, drop_last=True
                            ))
                            iters_left_previous[task_id] = len(data_loader_previous[task_id])
                else:
                    iters_left_previous -= 1
                    if iters_left_previous==0:
                        batch_size_to_use = min(batch_size, len(ConcatDataset(previous_datasets)))
                        data_loader_previous = iter(utils.get_data_loader(ConcatDataset(previous_datasets),
                                                                          batch_size_to_use, cuda=cuda, drop_last=True))
                        iters_left_previous = len(data_loader_previous)


            # -----------------Collect data------------------#

            #####-----CURRENT BATCH-----#####
            if replay_mode=="offline" and scenario=="task":
                x = y = scores = None
            else:
                x, y = next(data_loader)                                    #--> sample training data of current task
                y = y-classes_per_task*(task-1) if scenario=="task" else y  #--> ITL: adjust y-targets to 'active range'
                x, y = x.to(device), y.to(device)                           #--> transfer them to correct device
                # If --bce, --bce-distill & scenario=="class", calculate scores of current batch with previous model
                binary_distillation = hasattr(model, "binaryCE") and model.binaryCE and model.binaryCE_distill
                if binary_distillation and scenario=="class" and (previous_model is not None):
                    with torch.no_grad():
                        scores = previous_model(x)[:, :(classes_per_task * (task - 1))]
                else:
                    scores = None


            #####-----REPLAYED BATCH-----#####
            if not Exact and not Generative and not Current:
                x_ = y_ = scores_ = None   #-> if no replay

            ##-->> Exact Replay <<--##
            if Exact:
                scores_ = None
                if scenario in ("domain", "class"):
                    # Sample replayed training data, move to correct device
                    x_, y_ = next(data_loader_previous)
                    x_ = x_.to(device)
                    y_ = y_.to(device) if (model.replay_targets=="hard") else None
                    # If required, get target scores (i.e, [scores_]         -- using previous model, with no_grad()
                    if (model.replay_targets=="soft"):
                        with torch.no_grad():
                            scores_ = previous_model(x_)
                        scores_ = scores_[:, :(classes_per_task*(task-1))] if scenario=="class" else scores_
                        #-> when scenario=="class", zero probabilities will be added in the [utils.loss_fn_kd]-function
                elif scenario=="task":
                    # Sample replayed training data, wrap in (cuda-)Variables and store in lists
                    x_ = list()
                    y_ = list()
                    up_to_task = task if replay_mode=="offline" else task-1
                    for task_id in range(up_to_task):
                        x_temp, y_temp = next(data_loader_previous[task_id])
                        x_.append(x_temp.to(device))
                        # -only keep [y_] if required (as otherwise unnecessary computations will be done)
                        if model.replay_targets=="hard":
                            y_temp = y_temp - (classes_per_task*task_id) #-> adjust y-targets to 'active range'
                            y_.append(y_temp.to(device))
                        else:
                            y_.append(None)
                    # If required, get target scores (i.e, [scores_]         -- using previous model
                    if (model.replay_targets=="soft") and (previous_model is not None):
                        scores_ = list()
                        for task_id in range(up_to_task):
                            with torch.no_grad():
                                scores_temp = previous_model(x_[task_id])
                            scores_temp = scores_temp[:, (classes_per_task*task_id):(classes_per_task*(task_id+1))]
                            scores_.append(scores_temp)

            ##-->> Generative / Current Replay <<--##
            if Generative or Current:
                # Get replayed data (i.e., [x_]) -- either current data or use previous generator
                x_ = x if Current else previous_generator.sample(batch_size)

                # Get target scores and labels (i.e., [scores_] / [y_]) -- using previous model, with no_grad()
                # -if there are no task-specific mask, obtain all predicted scores at once
                if (not hasattr(previous_model, "mask_dict")) or (previous_model.mask_dict is None):
                    with torch.no_grad():
                        all_scores_ = previous_model(x_)
                # -depending on chosen scenario, collect relevant predicted scores (per task, if required)
                if scenario in ("domain", "class") and (
                        (not hasattr(previous_model, "mask_dict")) or (previous_model.mask_dict is None)
                ):
                    scores_ = all_scores_[:,:(classes_per_task * (task - 1))] if scenario == "class" else all_scores_
                    _, y_ = torch.max(scores_, dim=1)
                else:
                    # NOTE: it's possible to have scenario=domain with task-mask (so actually it's the Task-IL scenario)
                    # -[x_] needs to be evaluated according to each previous task, so make list with entry per task
                    scores_ = list()
                    y_ = list()
                    for task_id in range(task - 1):
                        # -if there is a task-mask (i.e., XdG is used), obtain predicted scores for each task separately
                        if hasattr(previous_model, "mask_dict") and previous_model.mask_dict is not None:
                            previous_model.apply_XdGmask(task=task_id + 1)
                            with torch.no_grad():
                                all_scores_ = previous_model(x_)
                        if scenario=="domain":
                            temp_scores_ = all_scores_
                        else:
                            temp_scores_ = all_scores_[:,
                                           (classes_per_task * task_id):(classes_per_task * (task_id + 1))]
                        _, temp_y_ = torch.max(temp_scores_, dim=1)
                        scores_.append(temp_scores_)
                        y_.append(temp_y_)

                # Only keep predicted y/scores if required (as otherwise unnecessary computations will be done)
                y_ = y_ if (model.replay_targets == "hard") else None
                scores_ = scores_ if (model.replay_targets == "soft") else None


            #---> Train MAIN MODEL
            if batch_index <= iters:

                # Train the main model with this batch
                loss_dict = model.train_a_batch(x, y, x_=x_, y_=y_, scores=scores, scores_=scores_,
                                                active_classes=active_classes, task=task, rnt = 1./task)

                # Update running parameter importance estimates in W
                if isinstance(model, ContinualLearner) and (model.si_c>0):
                    for n, p in model.named_parameters():
                        if p.requires_grad:
                            n = n.replace('.', '__')
                            if p.grad is not None:
                                W[n].add_(-p.grad*(p.detach()-p_old[n]))
                            p_old[n] = p.detach().clone()

                # Fire callbacks (for visualization of training-progress / evaluating performance after each task)
                for loss_cb in loss_cbs:
                    if loss_cb is not None:
                        loss_cb(progress, batch_index, loss_dict, task=task)
                for eval_cb in eval_cbs:
                    if eval_cb is not None:
                        eval_cb(model, batch_index, task=task)
                if model.label == "VAE":
                    for sample_cb in sample_cbs:
                        if sample_cb is not None:
                            sample_cb(model, batch_index, task=task)


            #---> Train GENERATOR
            if generator is not None and batch_index <= gen_iters:

                # Train the generator with this batch
                loss_dict = generator.train_a_batch(x, y, x_=x_, y_=y_, scores_=scores_, active_classes=active_classes,
                                                    task=task, rnt=1./task)

                # Fire callbacks on each iteration
                for loss_cb in gen_loss_cbs:
                    if loss_cb is not None:
                        loss_cb(progress_gen, batch_index, loss_dict, task=task)
                for sample_cb in sample_cbs:
                    if sample_cb is not None:
                        sample_cb(generator, batch_index, task=task)


        ##----------> UPON FINISHING EACH TASK...

        # Close progres-bar(s)
        progress.close()
        if generator is not None:
            progress_gen.close()

        ######
        if test_datasets:
            print("\n\n--> Evaluation ({}-incremental learning scenario: Before pruning):".format(scenario))
            # Evaluate precision of final model on full test-set
            precs = [evaluate.validate(
                model, test_datasets[i], verbose=False, test_size=None, task=i+1, with_exemplars=False,
                allowed_classes=list(range(classes_per_task*i, classes_per_task*(i+1))) if scenario=="task" else None
            ) for i in range(x_tasks)]
            print("\n Precision on test-set (softmax classification):")
            for i in range(x_tasks):
                print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
            average_precs = sum(precs) / x_tasks
            print('=> average precision over all {} tasks: {:.4f}'.format(x_tasks, average_precs))

            # -with exemplars
            if use_exemplars:
                precs = [evaluate.validate(
                    model, test_datasets[i], verbose=False, test_size=None, task=i+1, with_exemplars=True,
                    allowed_classes=list(range(classes_per_task*i, classes_per_task*(i+1))) if scenario=="task" else None
                ) for i in range(x_tasks)]
                print("\n Precision on test-set (classification using exemplars):")
                for i in range(x_tasks):
                    print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
                average_precs_ex = sum(precs) / x_tasks
                print('=> average precision over all {} tasks: {:.4f}'.format(x_tasks, average_precs_ex))
            print("\n")

        # -------------------------------------------------------------
            #pruning
            total = 0
            for m in model.modules():
                if isinstance(m, LinearExcitability):
                    print("module")
                    print(m)
                    total += m.weight.data.numel()
            x_weights = torch.zeros(total)
            index = 0
            for m in model.modules():
                if isinstance(m, LinearExcitability):
                    size = m.weight.data.numel()
                    x_weights[index:(index+size)] = m.weight.data.view(-1).abs().clone()
                    index += size

            y, i = torch.sort(x_weights)
            thre_index = int(total * sparsity)
            thre = y[thre_index]
            pruned = 0
            print('Pruning threshold: {}'.format(thre))
            zero_flag = False
            for k, m in enumerate(model.modules()):
                if isinstance(m, LinearExcitability):
                    weight_copy = m.weight.data.abs().clone()
                    mask = weight_copy.gt(thre).float()
                    pruned = pruned + mask.numel() - torch.sum(mask)
                    m.weight.data.mul_(mask)
                    if int(torch.sum(mask)) == 0:
                        zero_flag = True
                    print('layer index: {:d} \t total params: {:d} \t remaining params: {:d}'.
                        format(k, mask.numel(), int(torch.sum(mask))))
            print('Total conv params: {}, Pruned conv params: {}, Pruned ratio: {}'.format(total, pruned, pruned/total))
        # -------------------------------------------------------------

            print("\n\n--> Evaluation ({}-incremental learning scenario: After pruning):".format(scenario))

            # Evaluate precision of final model on full test-set
            precs = [evaluate.validate(
                model, test_datasets[i], verbose=False, test_size=None, task=i+1, with_exemplars=False,
                allowed_classes=list(range(classes_per_task*i, classes_per_task*(i+1))) if scenario=="task" else None
            ) for i in range(x_tasks)]
            print("\n Precision on test-set (softmax classification):")
            for i in range(x_tasks):
                print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
            average_precs = sum(precs) / x_tasks
            print('=> average precision over all {} tasks: {:.4f}'.format(x_tasks, average_precs))

            # -with exemplars
            if use_exemplars:
                precs = [evaluate.validate(
                    model, test_datasets[i], verbose=False, test_size=None, task=i+1, with_exemplars=True,
                    allowed_classes=list(range(classes_per_task*i, classes_per_task*(i+1))) if scenario=="task" else None
                ) for i in range(x_tasks)]
                print("\n Precision on test-set (classification using exemplars):")
                for i in range(x_tasks):
                    print(" - Task {}: {:.4f}".format(i + 1, precs[i]))
                average_precs_ex = sum(precs) / x_tasks
                print('=> average precision over all {} tasks: {:.4f}'.format(x_tasks, average_precs_ex))
            print("\n")

        ######

        # EWC: estimate Fisher Information matrix (FIM) and update term for quadratic penalty
        if isinstance(model, ContinualLearner) and (model.ewc_lambda>0):
            # -find allowed classes
            allowed_classes = list(
                range(classes_per_task*(task-1), classes_per_task*task)
            ) if scenario=="task" else (list(range(classes_per_task*task)) if scenario=="class" else None)
            # -if needed, apply correct task-specific mask
            if model.mask_dict is not None:
                model.apply_XdGmask(task=task)
            # -estimate FI-matrix
            model.estimate_fisher(training_dataset, allowed_classes=allowed_classes)

        # SI: calculate and update the normalized path integral
        if isinstance(model, ContinualLearner) and (model.si_c>0):
            model.update_omega(W, model.epsilon)

        # EXEMPLARS: update exemplar sets
        if (add_exemplars or use_exemplars) or replay_mode=="exemplars":
            exemplars_per_class = int(np.floor(model.memory_budget / (classes_per_task*task)))
            # reduce examplar-sets
            model.reduce_exemplar_sets(exemplars_per_class)
            # for each new class trained on, construct examplar-set
            new_classes = list(range(classes_per_task)) if scenario=="domain" else list(range(classes_per_task*(task-1),
                                                                                              classes_per_task*task))
            for class_id in new_classes:
                start = time.time()
                # create new dataset containing only all examples of this class
                class_dataset = SubDataset(original_dataset=train_dataset, sub_labels=[class_id])
                # based on this dataset, construct new exemplar-set for this class
                model.construct_exemplar_set(dataset=class_dataset, n=exemplars_per_class)
                print("Constructed exemplar-set for class {}: {} seconds".format(class_id, round(time.time()-start)))
            model.compute_means = True
            # evaluate this way of classifying on test set
            for eval_cb in eval_cbs_exemplars:
                if eval_cb is not None:
                    eval_cb(model, iters, task=task)

        # REPLAY: update source for replay
        previous_model = copy.deepcopy(model).eval()
        if replay_mode == 'generative':
            Generative = True
            previous_generator = copy.deepcopy(generator).eval() if generator is not None else previous_model
        elif replay_mode == 'current':
            Current = True
        elif replay_mode in ('exemplars', 'exact'):
            Exact = True
            if replay_mode == "exact":
                previous_datasets = train_datasets[:task]
            else:
                if scenario == "task":
                    previous_datasets = []
                    for task_id in range(task):
                        previous_datasets.append(
                            ExemplarDataset(
                                model.exemplar_sets[
                                (classes_per_task * task_id):(classes_per_task * (task_id + 1))],
                                target_transform=lambda y, x=classes_per_task * task_id: y + x)
                        )
                else:
                    target_transform = (lambda y, x=classes_per_task: y % x) if scenario == "domain" else None
                    previous_datasets = [
                        ExemplarDataset(model.exemplar_sets, target_transform=target_transform)]
Example #11
0
 def test_valid(self):
     fileName = self.param
     print fileName
     self.assertEqual(validate(fileName),0)
Example #12
0
    prefix = f'baseline_{currentTime}' if args.baseline else f'{args.alpha}_{args.beta}_{args.eta}_{currentTime}'
    if args.baseline:
        criterion = nn.CrossEntropyLoss().cuda()
    else:
        criterion = MixedEntropy(alpha=args.alpha, beta=args.beta).cuda()

    # training
    if args.eval:
        if args.resume:
            dicts = torch.load(args.resume)
            model_dict = dicts['state_dict']
            model.load_state_dict(model_dict)
            model.cuda()
        else:
            raise RuntimeError('please specify the path to the model')
        acc = validate(test_loader, model, args)
        print(f'the accuracy is: {acc}')

    else:
        model.train()
        for epoch in range(120):
            print(f'training epoch {epoch}!')
            total_train_loss = train(train_loader, model, criterion, optimizer,
                                     scheduler, epoch, args)
            total_train_loss /= train_dataset.__len__()
            writer.add_scalar('Train/Loss', total_train_loss, epoch)
            if epoch in [20, 40, 60, 80, 100]:
                state_dict = {
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
Example #13
0
    optimizer = get_optimizer(opt, model)
    criterion = nn.CrossEntropyLoss()
    best_accuracy = 0
    iter = 0

    for epoch in range(opt.epoch):
        # train for one epoch
        iter, loss = train(train_loader,
                           model,
                           criterion,
                           optimizer,
                           epoch,
                           iter=iter)
        # evaluate
        loss, accuracy = validate(val_loader, model, criterion, epoch)

        is_best = accuracy < best_accuracy
        best_accuracy = min(accuracy, best_accuracy)

        # If best_eval, best_save_path
        # Save latest/best weights in the model directory
        save_checkpoint(
            {
                "state_dict": model,
                "epoch": epoch + 1,
                "accuracy": accuracy,
                "optimizer": optimizer.state_dict(),
            }, is_best, opt.SAVE_DIR, 'checkpoint.pth')

        print('accuracy: {:.2f}%'.format(100 * accuracy))