Example #1
0
def run():
    """ Run the script"""
    #############
    ### Setup ###
    #############

    config = train_args_seq_smnist.parse_cmd_arguments()
    device, writer, logger = sutils.setup_environment(config)
    dhandlers = ctu._generate_tasks(config, logger)

    # We will use the namespace below to share miscellaneous information between
    # functions.
    shared = Namespace()
    shared.feature_size = dhandlers[0].in_shape[0]

    # Plot images.
    if config.show_plots:
        figure_dir = os.path.join(config.out_dir, 'figures')
        if not os.path.exists(figure_dir):
            os.makedirs(figure_dir)

        for t, dh in enumerate(dhandlers):
            dh.plot_samples('Test Samples - Task %d' % t,
                dh.get_train_inputs()[:8], outputs=dh.get_train_outputs()[:8],
                show=True, filename=os.path.join(figure_dir,
                    'test_samples_task_%d.png' % t))

    target_net, hnet, dnet = stu.generate_networks(config, shared, dhandlers,
                                                   device)

    # generate masks if needed
    ctx_masks = None
    if config.use_masks:
        ctx_masks = stu.generate_binary_masks(config, device, target_net)

    # We store the target network weights (excluding potential context-mod
    # weights after every task). In this way, we can quantify changes and
    # observe the "stiffness" of EWC.
    shared.tnet_weights = []
    # We store the context-mod weights (or all weights) coming from the hypernet
    # after every task, in order to quantify "forgetting". Note, the hnet
    # regularizer should keep them fix.
    shared.hnet_out = []

    # Get the task-specific functions for loss and accuracy.
    task_loss_func = ctu.get_loss_func(config, device, logger, ewc_loss=False)
    accuracy_func = ctu.get_accuracy_func(config)
    ewc_loss_func = ctu.get_loss_func(config, device, logger, ewc_loss=True) \
        if config.use_ewc else None

    replay_fcts = None
    if config.use_replay:
        replay_fcts = dict()
        replay_fcts['rec_loss'] = ctu.get_vae_rec_loss_func()
        replay_fcts['distill_loss'] = ctu.get_distill_loss_func()
        replay_fcts['soft_trgt_acc'] = ctu.get_soft_trgt_acc_func()

    if config.multitask:
        summary_keywords=hpsearch_mt._SUMMARY_KEYWORDS
        summary_filename=hpsearch_mt._SUMMARY_FILENAME
    else:
        summary_keywords=hpsearch_cl._SUMMARY_KEYWORDS
        summary_filename=hpsearch_cl._SUMMARY_FILENAME

    ########################
    ### Train classifier ###
    ########################

    # Train the network task by task. Testing on all tasks is run after 
    # finishing training on each task.
    ret, train_loss, test_loss, test_acc = sts.train_tasks(dhandlers,
        target_net, hnet, dnet, device, config, shared, logger, writer,
        ctx_masks, summary_keywords, summary_filename,
        task_loss_func=task_loss_func, accuracy_func=accuracy_func,
        ewc_loss_func=ewc_loss_func, replay_fcts=replay_fcts)

    stu.log_results(test_acc, config, logger)

    writer.close()

    if ret == -1:
        logger.info('Program finished successfully.')

        if config.show_plots:
            plt.show()
    else:
        logger.error('Only %d tasks have completed training.' % (ret+1))
def analyse_single_run(out_dir, device, writer, logger, analysis_kwd,
        get_loss_func, accuracy_func, generate_tasks_func, n_samples=-1,
        redo_analyses=False, do_kernel_pca=False, do_supervised_dimred=False,
        timesteps_for_analysis=None, copy_task=True, num_tasks=-1,
        sup_dimred_criterion=None, sup_dimred_args={}):
    """Analyse the hidden dimensionality for an individual run.

    Args:
        out_dir (str): The path to the output directory.
        device: The device.
        writer: The tensorboard writer.
        logger: The logger.
        analysis_kwd (dict): The dictionary containing important keywords for
            the current analysis.
        get_loss_func (func): A handler to generate the loss function.
        accuracy_func (func): A handler to the accuracy function.
        generate_tasks_func (func): A handler to a datahandler generator.
        redo_analyses (boolean, optional): If ``True``, analyses will be redone
            even if they had been stored previously.
        do_kernel_pca (bool, optional): If ``True``, kernel PCA will also be
            used to compute the number of hidden dimensions.
        do_supervised_dimred (bool, optional): If ``True``, supervised linear
            dimensionality reduction will be used to compute the number of
            task-relevant hidden dimensions.
        n_samples (int): The number of samples to be used.
        timesteps_for_analysis (str, optional): The timesteps to be used for the
            PCA analyses.
        copy_task (bool, optional): Indicates whether we are analysing the
            Copy Task or not.
        num_tasks (int, optional): The number of tasks to be considered.
        sup_dimred_criterion (int, optional): If provided, this value will 
            be used as stopping criterion when looking for the number of 
            necessary supervised components to describe the hidden activity.
        sup_dimred_args (dict): Optional arguments (e.g., optimization
            arguments) passed to the supervised dimensionality reduction
            :func:`sequential.ht_analyses.supervised_dimred_utils.\
get_loss_vs_supervised_n_dim`.

    Returns:
        (tuple): Tuple containing:

        - **results**: The dictionary of results for the current run.
        - **settings**: The dictionary with the values of the parameters that
          are specified in `analysis_kwd['fixed_params']`.

    """

    ### Prepare the data and the networks.
    # Load the config
    if not os.path.exists(out_dir):
        raise ValueError('The directory "%s" does not exist.'%out_dir)
    with open(os.path.join(out_dir, "config.pickle"), "rb") as f:
        config = pickle.load(f)
    # Overwrite the directory it it's not the same as the original.
    if config.out_dir != out_dir:
        config.out_dir = out_dir
    # Check for old command line arguments and make compatible with new version.
    config = train_args_sequential.update_cli_args(config)

    print('Working on output directory "%s".' % out_dir)

    # Overwrite the number of tasks.
    if num_tasks == -1:
        num_tasks = config.num_tasks

    if sup_dimred_criterion == -1:
        sup_dimred_criterion = None

    stop_bit=None
    if copy_task:
        # Get the index of the stop bit.
        #stop_bit = getattr(config, analysis_kwd['complexity_measure'])
        # If we do not enforce the condition below, we have to determine the
        # location of the stop bit on a sample-by-sample basis.
        assert config.input_len_step == 0 and config.input_len_variability == 0
        stop_bit = config.first_task_input_len
        if config.pad_after_stop:
            stop_bit = config.pat_len

    ### Sanity checks.
    # Do some sanity checks in the parameters.
    assert config.use_ewc or config.use_si
    if config.use_ewc:
        method = 'ewc'
    elif config.use_si:
        method = 'si'
    for key, value in analysis_kwd['forced_params']:
        assert getattr(config, key) == value
    # Ensure all runs have comparable properties
    if 'num_tasks' not in analysis_kwd['fixed_params']:
        analysis_kwd['fixed_params'].append('num_tasks')

    ### Create the settings dictionary.
    settings = {}
    for key in analysis_kwd['fixed_params']:
        settings[key] = getattr(config, key)
        if key == 'num_tasks':
            settings[key] = num_tasks

    ### Load or create the results dictionary.
    if os.path.exists(os.path.join(out_dir, "pca_results.pickle")) and \
            not redo_analyses:
        ### Load existing results.
        with open(os.path.join(out_dir, "pca_results.pickle"), "rb") as f:
            results = pickle.load(f)
        print('PCA analyses have been done and stored previously and reloaded.')
        assert num_tasks == -1 or results['num_tasks'] == num_tasks

        if 'mean_fisher' in results:
            results['mean_importance'] = results['mean_fisher']
            results['mean_importance_ho'] = results['mean_fisher_ho']
    else:
        ### Prepare the environment.
        # Define functions.
        task_loss_func = get_loss_func(config, device, logger)
        accuracy_func = accuracy_func
        # Generate datahandlers
        dhandlers = generate_tasks_func(config, logger, writer=writer)
        config.show_plots = True
        plc.visualise_data(dhandlers, config, device)
        # Generate the networks
        shared = argparse.Namespace()
        # FIXME might not work for all datasets (e.g., PoS tagging).
        shared.feature_size = dhandlers[0].in_shape[0]
        target_net, hnet, _ = stu.generate_networks(config, shared, dhandlers,
                                                    device)

        ### Initialize the results dictionary.
        results = {}
        if copy_task:
            results['masked'] = config.pat_len
            results['pad_after_stop'] = config.pad_after_stop
            results['accs_per_ts'] = []
            results['permutation'] = []
        results['expl_var_per_ts'] = []
        results['kexpl_var_per_ts'] = []
        results['expl_var_per_ts_yt'] = []
        results['kexpl_var_per_ts_yt'] = []
        results['complexity_measure'] = getattr(config, \
            analysis_kwd['complexity_measure'])
        results['complexity_measure_name'] = \
            analysis_kwd['complexity_measure_name']
        results['num_tasks'] = num_tasks
        results['final_acc'] = []
        results['final_loss'] = []
        results['mean_importance'] = []
        results['mean_importance_ho'] = []
        results['expl_var'] = []
        results['kexpl_var'] = []
        results['expl_var_yt'] = []
        results['kexpl_var_yt'] = []
        if do_supervised_dimred:
            # Note, in the code 'loss_n_dim_supervised' plays, for the
            # supervised dimensionality reduction, the same role as 'expl_var'
            # for the standard PCA analysis, i.e. we store the explained
            # variance (resp. loss) as a function of how many dimensions are
            # taken into account, and then select a threshold for the explained
            # variance (resp. loss) to determine the number of intrinsic
            # dimensions.
            results['loss_n_dim_supervised'] = []
            results['accu_n_dim_supervised'] = []
            if copy_task:
                results['accu_n_dim_sup_at_stop'] = []
                results['loss_n_dim_sup_at_stop'] = []

        # Iterate over all tasks and accumulate results in lists within the
        # results dictionary values.
        all_during_act = []
        all_during_act_yt = []
        for task_id in range(num_tasks):

            if copy_task:
                results['permutation'].append(dhandlers[task_id].permutation)

            ### Load the checkpointed during model for the corresponding task.
            # Note, the return values of the function below are just references
            # to the variables `target_net` and `hnet`, which are modified in-
            # place.
            mnet, hnet = load_models(out_dir, device, logger, target_net, hnet,
                wembs=None, task_id=task_id, method=method)
            # FIXME Should we disentangle weight matrices and bias vectors?
            hh_imp_values = get_importance_values(mnet, connection_type='hh',
                method=method)
            results['mean_importance'].append(np.mean(hh_imp_values))
            ho_imp_values = get_importance_values(mnet, connection_type='ho',
                method=method)
            if ho_imp_values != []:
                results['mean_importance_ho'].append(np.mean(ho_imp_values))
            else:
                results['mean_importance_ho'].append(np.nan)

            ### Obtain hidden activations and performances.
            # We only measure the final accuracy up to the current task, since
            # we are simulating a continual learning setting with less tasks.
            loss, accs, accs_per_ts = test(dhandlers, device, config, None,
                logger, writer, mnet, hnet, store_activations=True, \
                accuracy_func=accuracy_func, task_loss_func=task_loss_func,
                num_trained=task_id, return_acc_per_ts=True)
            results['final_loss'].append(np.mean(loss[:task_id+1]))
            if accs is None:
                results['final_acc'].append(None)
            else:
                results['final_acc'].append(np.mean(accs[:task_id+1]))
            if copy_task:
                results['accs_per_ts'].append(accs_per_ts[task_id])

            ### Load the internal activations.
            tasks_act, act = get_activations(out_dir, task_id=task_id,
                vanilla_rnn=config.use_vanilla_rnn)
            n_hidden = np.sum(misc.str_to_ints(config.rnn_arch))
            assert act.shape[-1] == n_hidden
            all_during_act.append(act)
            tasks_act_yt, act_yt = get_activations(out_dir, task_id=task_id,
                internal=False, vanilla_rnn=config.use_vanilla_rnn)
            all_during_act_yt.append(act_yt)

            ### Do PCA analyses.
            # Do analyses on internal recurrent activations.
            results = pca_analysis_single_task(act, results,
                do_kernel_pca=do_kernel_pca, n_samples=n_samples,
                timesteps=timesteps_for_analysis, stop_bit=stop_bit,
                do_supervised_dimred=do_supervised_dimred)

            # Do analyses on output recurrent activations.
            results = pca_analysis_single_task(act_yt, results,
                do_kernel_pca=do_kernel_pca, n_samples=n_samples,
                timesteps=timesteps_for_analysis, stop_bit=stop_bit,
                internal=False, do_supervised_dimred=do_supervised_dimred)

            if do_supervised_dimred:
                if not copy_task:
                    raise NotImplementedError('TODO need to adapt the ' +
                        'loss computation for tasks other than the Copy Task.')
                # Do supervised dimensionality reduction on during models.
                loss_dim, accu_dim = get_loss_vs_supervised_n_dim(mnet,
                        hnet, task_loss_func, accuracy_func, dhandlers, config,
                        device, task_id=task_id, criterion=sup_dimred_criterion,
                        writer_dir=out_dir, **sup_dimred_args)
                results['loss_n_dim_supervised'].append(loss_dim)
                results['accu_n_dim_supervised'].append(accu_dim)
                if copy_task:
                    loss_dim, accu_dim = get_loss_vs_supervised_n_dim(mnet,
                            hnet, task_loss_func, accuracy_func, dhandlers,
                            config, device, stop_timestep=stop_bit,
                            task_id=task_id, criterion=sup_dimred_criterion,
                            writer_dir=out_dir, **sup_dimred_args)
                    results['loss_n_dim_sup_at_stop'].append(loss_dim)
                    results['accu_n_dim_sup_at_stop'].append(accu_dim)

        ### Get hidden dimensionality using the final model.
        # Note, here we overwrite the files "int_activations.pickle" and
        # "activations.pickle" that were generated when testing the model of
        # the current task.
        os.remove(os.path.join(out_dir, 'int_activations.pickle'))
        os.remove(os.path.join(out_dir, 'activations.pickle'))
        mnet, hnet = load_models(out_dir, device, logger, target_net, hnet,
                                 wembs=None, method=method)
        _ = test(dhandlers, device, config, shared, logger, writer, mnet,
            hnet, store_activations=True,
            accuracy_func=accuracy_func, task_loss_func=task_loss_func,
            num_trained=task_id, return_acc_per_ts=True)

        # Load internal activations.
        tasks_act, act = get_activations(out_dir, task_id=task_id,
            vanilla_rnn=config.use_vanilla_rnn)
        tasks_act_yt, act_yt = get_activations(out_dir, task_id=task_id,
            internal=False, vanilla_rnn=config.use_vanilla_rnn)

        ### Do PCA analyses on final models.
        results = pca_analysis_all_tasks(act, all_during_act, results,
                do_kernel_pca=do_kernel_pca, n_samples=n_samples,
                timesteps=timesteps_for_analysis, stop_bit=stop_bit,
                copy_task=copy_task)
        results = pca_analysis_all_tasks(act_yt, all_during_act_yt, results,
                do_kernel_pca=do_kernel_pca, n_samples=n_samples,
                timesteps=timesteps_for_analysis, stop_bit=stop_bit,
                copy_task=copy_task, internal=False)

        if do_supervised_dimred and len(all_during_act) > 1:
            ### Do supervised dimensionality reduction on final models.
            # Only do if we dealt with more than one task.
            loss_dim, accu_dim = get_loss_vs_supervised_n_dim(mnet, hnet,
                    task_loss_func, accuracy_func, dhandlers, config, device,
                    criterion=sup_dimred_criterion, writer_dir=out_dir,
                    **sup_dimred_args)
            results['loss_n_dim_supervised_all_tasks'] = loss_dim
            results['accu_n_dim_supervised_all_tasks'] = accu_dim

        # Store pickle results.
        with open(os.path.join(out_dir, 'pca_results.pickle'), 'wb') as handle:
            pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return results, settings
Example #3
0
def run():
    """ Run the script"""
    #############
    ### Setup ###
    #############

    config = train_args_copy.parse_cmd_arguments()
    device, writer, logger = sutils.setup_environment(config)
    dhandlers = ctu.generate_copy_tasks(config, logger, writer=writer)
    plc.visualise_data(dhandlers, config, device)

    # We will use the namespace below to share miscellaneous information between
    # functions.
    shared = Namespace()
    shared.feature_size = dhandlers[0].in_shape[0]

    if (config.permute_time or config.permute_width) and not \
            config.scatter_pattern and not config.permute_xor_separate and \
            not config.permute_xor_iter > 1:
        chance = ctu.compute_chance_level(dhandlers, config)
        logger.info('Chance level for perfect during accuracies: %.2f' %
                    chance)

    # A bit ugly, find a nicer way (problem is, if you overwrite this before
    # generating the tasks, always the task with shortest sequences is chosen).
    if config.last_task_only:
        config.num_tasks = 1

    target_net, hnet, dnet = stu.generate_networks(config, shared, dhandlers,
                                                   device)

    # generate masks if needed
    ctx_masks = None
    if config.use_masks:
        ctx_masks = stu.generate_binary_masks(config, device, target_net)

    # We store the target network weights (excluding potential context-mod
    # weights after every task). In this way, we can quantify changes and
    # observe the "stiffness" of EWC.
    shared.tnet_weights = []
    # We store the context-mod weights (or all weights) coming from the hypernet
    # after every task, in order to quantify "forgetting". Note, the hnet
    # regularizer should keep them fix.
    shared.hnet_out = []

    # Get the task-specific functions for loss and accuracy.
    task_loss_func = ctu.get_copy_loss_func(config,
                                            device,
                                            logger,
                                            ewc_loss=False)
    accuracy_func = ctu.get_accuracy
    ewc_loss_func = ctu.get_copy_loss_func(config, device, logger, \
        ewc_loss=True) if config.use_ewc else None

    replay_fcts = None
    if config.use_replay:
        replay_fcts = dict()
        replay_fcts['rec_loss'] = ctu.get_vae_rec_loss_func()
        replay_fcts['distill_loss'] = ctu.get_distill_loss_func()
        replay_fcts['soft_trgt_acc'] = ctu.get_soft_trgt_acc_func()

    if config.multitask:
        summary_keywords = hpsearch_mt._SUMMARY_KEYWORDS
        summary_filename = hpsearch_mt._SUMMARY_FILENAME
    else:
        summary_keywords = hpsearch_cl._SUMMARY_KEYWORDS
        summary_filename = hpsearch_cl._SUMMARY_FILENAME

    ########################
    ### Train classifier ###
    ########################

    # Train the network task by task. Testing on all tasks is run after
    # finishing training on each task.
    ret, train_loss, test_loss, test_acc = sts.train_tasks(
        dhandlers,
        target_net,
        hnet,
        dnet,
        device,
        config,
        shared,
        logger,
        writer,
        ctx_masks,
        summary_keywords,
        summary_filename,
        task_loss_func=task_loss_func,
        accuracy_func=accuracy_func,
        ewc_loss_func=ewc_loss_func,
        replay_fcts=replay_fcts)

    stu.log_results(test_acc, config, logger)

    writer.close()

    if ret == -1:
        logger.info('Program finished successfully.')

        if config.show_plots:
            plt.show()
    else:
        logger.error('Only %d tasks have completed training.' % (ret + 1))
Example #4
0
    # FIXME Code below copied from script `state_space_analysis`.
    # Load the config
    if not os.path.exists(out_dir):
        raise ValueError('The directory "%s" does not exist.' % out_dir)
    with open(os.path.join(out_dir, "config.pickle"), "rb") as f:
        config = pickle.load(f)
    # Overwrite the directory it it's not the same as the original.
    if config.out_dir != out_dir:
        config.out_dir = out_dir
    # Check for old command line arguments and make compatible with new version.
    config = sta.update_cli_args(config)

    # FIXME only for copy task!
    generate_tasks_func = copytu.generate_copy_tasks
    dhandlers = generate_tasks_func(config, logger, writer=writer)
    mnet, hnet, _ = stu.generate_networks(config, dhandlers, device)
    ssa.load_models(out_dir,
                    device,
                    logger,
                    mnet,
                    hnet=hnet,
                    task_id=config.num_tasks - 1)
    writer.close()

    # FIXME no hnet support yet.
    assert len(mnet.param_shapes) == len(mnet.internal_params)

    V_per_task = []

    i_start = 0
    for tid in range(config.num_tasks):
Example #5
0
def run():
    """Run the script"""
    #############
    ### Setup ###
    #############

    config = train_args_pos.parse_cmd_arguments()
    device, writer, logger = sutils.setup_environment(config)
    dhandlers = ctu.generate_tasks(config, logger, writer=writer)

    # Load preprocessed word embeddings, see
    # :mod:`data.timeseries.preprocess_mud` for details.
    wembs_path = '../../datasets/sequential/mud/embeddings.pickle'
    wemb_lookups = eu.generate_emb_lookups(config,
                                           filename=wembs_path,
                                           device=device)
    assert len(wemb_lookups) == config.num_tasks

    # We will use the namespace below to share miscellaneous information between
    # functions.
    shared = Namespace()
    # The embedding size is fixed due to the use of pretrained polyglot
    # embeddings.
    # FIXME Could be made configurable in the future in case we don't initialize
    # embeddings via polyglot.
    shared.feature_size = 64
    shared.word_emb_lookups = wemb_lookups

    target_net, hnet, dnet = stu.generate_networks(config, shared, dhandlers,
                                                   device)

    # generate masks if needed
    ctx_masks = None
    if config.use_masks:
        ctx_masks = stu.generate_binary_masks(config, device, target_net)

    # We store the target network weights (excluding potential context-mod
    # weights after every task). In this way, we can quantify changes and
    # observe the "stiffness" of EWC.
    shared.tnet_weights = []
    # We store the context-mod weights (or all weights) coming from the hypernet
    # after every task, in order to quantify "forgetting". Note, the hnet
    # regularizer should keep them fix.
    shared.hnet_out = []

    # Get the task-specific functions for loss and accuracy.
    task_loss_func = ctu.get_loss_func(config, device, logger, ewc_loss=False)
    accuracy_func = ctu.get_accuracy_func(config)
    ewc_loss_func = ctu.get_loss_func(config, device, logger, \
        ewc_loss=True) if config.use_ewc else None

    replay_fcts = None
    if config.use_replay:
        replay_fcts = dict()
        replay_fcts['rec_loss'] = ctu.get_vae_rec_loss_func()
        replay_fcts['distill_loss'] = ctu.get_distill_loss_func()
        replay_fcts['soft_trgt_acc'] = ctu.get_soft_trgt_acc_func()

    if config.multitask:
        summary_keywords = hpsearch_mt._SUMMARY_KEYWORDS
        summary_filename = hpsearch_mt._SUMMARY_FILENAME
    else:
        summary_keywords = hpsearch_cl._SUMMARY_KEYWORDS
        summary_filename = hpsearch_cl._SUMMARY_FILENAME

    ########################
    ### Train classifier ###
    ########################

    shared.f_scores = None

    # Train the network task by task. Testing on all tasks is run after
    # finishing training on each task.
    ret, train_loss, test_loss, test_acc = sts.train_tasks(
        dhandlers,
        target_net,
        hnet,
        dnet,
        device,
        config,
        shared,
        logger,
        writer,
        ctx_masks,
        summary_keywords,
        summary_filename,
        task_loss_func=task_loss_func,
        accuracy_func=accuracy_func,
        ewc_loss_func=ewc_loss_func,
        replay_fcts=replay_fcts)

    stu.log_results(test_acc, config, logger)

    writer.close()

    if ret == -1:
        logger.info('Program finished successfully.')

        if config.show_plots:
            plt.show()
    else:
        logger.error('Only %d tasks have completed training.' % (ret + 1))