def __init__(self, config: ModelConfig):
        """
        Defines the model using the appropriate config class containing all the necessary parameters (activation, loss,
        loss, optimizer, training/eval batch size, ...)
        :param config: an object of the ModelConfig class.
        """
        super(BaseModel, self).__init__()
        self._check_config(config)
        self._name = config.name
        self._set_activation(config.activation)
        self._set_normalization(config.normalization)
        self._set_loss(config.loss)
        self._build_model(config)
        self.initialize_params(
            config.initializer)  # initialize the parameters using the config
        if len(list(self.parameters())) == 0:
            raise ValueError(
                "Model has no parameters defined and optimizer cannot be defined: len(self.parameters) = "
                "0. Parameters have to be defined in the _build_model() method."
            )
        else:  # only set the optimizer if some parameters have been already defined
            self._set_optimizer(config.optimizer)
        self._set_scheduler(config.scheduler)

        # define hparams for later logging
        self.hparams = config.dict()
        self.save_hyperparameters(config.dict())

        # define an attribute to hold all the history of train/val/test metrics for later plotting /analysing
        self.results = {'training': [], 'validation': [], 'test': []}
    def setUp(self) -> None:
        config_dict = read_yaml(CONFIG_PATH)

        self.input_size = config_dict['architecture']['input_size']
        self.base_lr = config_dict['optimizer']['params']['lr']
        self.n_warmup_steps = 1
        self.batch_size = 128

        self.width = config_dict['architecture']['width']
        self.L = 3
        config_dict['architecture']['n_layers'] = self.L + 1
        config_dict['optimizer']['params']['lr'] = self.base_lr
        config_dict['scheduler'] = {
            'name': 'warmup_switch',
            'params': {
                'n_warmup_steps': self.n_warmup_steps,
                'calibrate_base_lr': True
            }
        }

        self.base_model_config = ModelConfig(config_dict)
        self.training_dataset, _ = load_data(download=False, flatten=True)
        self.train_data_loader = DataLoader(self.training_dataset,
                                            shuffle=True,
                                            batch_size=self.batch_size)
        self.batches = list(self.train_data_loader)
 def test_resnet_config_from_file(self):
     config = ModelConfig(config_file=self.reset_config_path)
     self.assertTrue(config.name == "ResNet")
     # architecture
     self.assertDictEqual(
         {
             "input_size": 28,
             "n_blocks": 2,
             "n_layers": 2,
             "kernel_size": 3,
             "stride": 1,
             "n_channels": 32,
             "bias_conv": True,
             "fc_dim": 256,
             "bias_fc": True,
             "output_size": 10
         }, config.architecture)
     # activation
     self.assertTrue(config.activation.name == "relu")
     self.assertFalse(hasattr(config.activation, "params"))
     # loss
     self.assertTrue(config.loss.name == "cross_entropy")
     self.assertDictEqual(config.loss.params, {"reduction": "mean"})
     # opt
     self.assertTrue(config.optimizer.name == "adam")
     self.assertDictEqual(config.optimizer.params, {
         "lr": 1.0e-4,
         "beta1": 0.9,
         "beta2": 0.999
     })
     # norm
     self.assertTrue(config.normalization.name == "batch_norm_2d")
     self.assertFalse(hasattr(config.normalization, "params"))
Beispiel #4
0
    def setUp(self) -> None:
        config_dict = read_yaml(CONFIG_PATH)
        self.base_model_config = ModelConfig(config_dict)
        self.width = 0

        self.ntk = ntk.FCNTK(self.base_model_config, self.width)
        self.ip = ip.FCIP(self.base_model_config, c=0, width=self.width)
        self.muP = muP.FCmuP(self.base_model_config, self.width)
Beispiel #5
0
    def _run_trial(self, idx, seed, k, r, batch_size, n, n_train, d, m):
        trial_name = 'trial_{}'.format(idx + 1)
        self.trial_dir = os.path.join(self.exp_dir, trial_name)

        if not os.path.exists(
                self.trial_dir):  # run trial only if it doesn't already exist
            create_dir(self.trial_dir)  # directory to save the trial
            self.trial_version = '{}_{}'.format(
                self.version, trial_name)  # version for TensorBoard

            self._set_tb_logger_and_callbacks(
                trial_name)  # tb logger, checkpoints and early stopping

            log_dir = os.path.join(
                self.trial_dir,
                self.LOG_NAME)  # define path to save the logs of the trial
            logger = set_up_logger(log_dir)
            logger.info('----- Trial {:,} with version {} -----\n'.format(
                idx, self.trial_version))
            self._log_experiment_info(k, r, batch_size, n, n_train, d, m)

            set_random_seeds(seed)  # set random seed for the trial
            logger.info('Random seed used for the script : {:,}'.format(
                self.SEED))
            logger.info('Random seed used for the trial : {:,}\n'.format(seed))

            config = ModelConfig(
                config_dict=self.config_dict
            )  # define the config as a class to pass to the model
            two_layer_net = TwoLayerNet(config,
                                        train_hidden=True)  # define the model
            logger.info('Number of model parameters : {:,}'.format(
                two_layer_net.count_parameters()))
            logger.info('Model architecture :\n{}\n'.format(two_layer_net))

            # training and validation pipeline
            trainer = pl.Trainer(
                max_epochs=MAX_EPOCHS,
                max_steps=MAX_STEPS,
                logger=self.tb_logger,
                checkpoint_callback=self.checkpoint_callback,
                num_sanity_val_steps=0,
                early_stop_callback=self.early_stopping_callback)
            trainer.fit(model=two_layer_net,
                        train_dataloader=self.train_data_loader,
                        val_dataloaders=self.val_data_loader)

            # test pipeline
            test_results = trainer.test(model=two_layer_net,
                                        test_dataloaders=self.test_data_loader)
            logger.info('Test results :\n{}\n'.format(test_results))

            # save all training val and test results to pickle file
            with open(os.path.join(self.trial_dir, self.RESULTS_FILE),
                      'wb') as file:
                pickle.dump(two_layer_net.results, file)
 def test_named_config_from_dict(self):
     config = ModelConfig(config_dict={"name": "Config"})
     self.assertTrue(config.name == "Config")
     # activation
     self.assertTrue(config.activation.name is None)
     self.assertFalse(hasattr(config.activation, "params"))
     # loss
     self.assertTrue(config.loss.name is None)
     self.assertFalse(hasattr(config.loss, "params"))
     # opt
     self.assertTrue(config.optimizer.name is None)
     self.assertFalse(hasattr(config.optimizer, "params"))
     # norm
     self.assertTrue(config.normalization is None)
 def test_empty_config_from_file(self):
     config = ModelConfig(config_file=self.empty_config_path)
     self.assertTrue(config.name == "model")
     # activation
     self.assertTrue(config.activation.name is None)
     self.assertFalse(hasattr(config.activation, "params"))
     # loss
     self.assertTrue(config.loss.name is None)
     self.assertFalse(hasattr(config.loss, "params"))
     # opt
     self.assertTrue(config.optimizer.name is None)
     self.assertFalse(hasattr(config.optimizer, "params"))
     # norm
     self.assertTrue(config.normalization is None)
    def setUp(self) -> None:
        config_dict = read_yaml(CONFIG_PATH)

        self.input_size = config_dict['architecture']['input_size']
        self.base_lr = config_dict['optimizer']['params']['lr']
        self.n_warmup_steps = 2
        self.width = config_dict['architecture']['width']
        self.L = config_dict['architecture']['n_layers'] - 1
        config_dict['optimizer']['params']['lr'] = self.base_lr
        config_dict['scheduler'] = {'name': 'warmup_switch',
                                    'params': {'n_warmup_steps': self.n_warmup_steps,
                                               'calibrate_base_lr': False}}

        self.base_model_config = ModelConfig(config_dict)
        self.ipllr = FcIPLLR(self.base_model_config, n_warmup_steps=4)
    def setUp(self) -> None:
        config_file = os.path.join('../../pytorch/configs',
                                   'wide_two_layer_net.yaml')
        with open(config_file, 'r') as stream:
            try:
                config_dict = yaml.safe_load(stream)
            except yaml.YAMLError as e:
                raise Exception(
                    "Exception while reading yaml file {} : {}".format(
                        config_file, e))

        # parameters of the experiment
        r, k, self.n, d = 0.5, 3, 700, 20
        self.n_train, self.n_val = 256, 256  # n_test = n - (n_train + n_val)

        self.version = 'test_new_mac_n={}_d={}_m={}'.\
            format(self.n, d, config_dict['architecture']['hidden_layer_dim'])

        # config and net
        config_dict['architecture']['input_size'] = d
        self.config = ModelConfig(config_dict=config_dict)
        self.two_layer_net = TwoLayerNet(self.config, train_hidden=True)

        # generate data
        ds = self._generate_data(k, r, d, self.n)

        # define train/val/test data loaders
        self._set_data_loaders(ds, self.n, self.n_train, self.n_val)

        # define tb logger and callbacks
        self.tb_logger = TensorBoardLogger(save_dir=SAVE_DIR,
                                           version=self.version,
                                           name=NAME)
        checkpoints_name_template = '{epoch}_{val_accuracy:.3f}_{val_loss:.3f}_{val_auc:.3f}'
        checkpoints_path = os.path.join(SAVE_DIR, NAME, self.version,
                                        'checkpoints',
                                        checkpoints_name_template)
        self.checkpoint_callback = ModelCheckpoint(filepath=checkpoints_path,
                                                   save_top_k=3,
                                                   verbose=True,
                                                   monitor='val_accuracy',
                                                   mode='max',
                                                   prefix='')
        self.early_stopping_callback = EarlyStopping(monitor='val_loss',
                                                     min_delta=1.0e-6,
                                                     patience=5,
                                                     mode='min')
    def setUp(self) -> None:
        config_file = os.path.join('../../pytorch/configs', 'wide_two_layer_net.yaml')
        with open(config_file, 'r') as stream:
            try:
                config_dict = yaml.safe_load(stream)
            except yaml.YAMLError as e:
                raise Exception("Exception while reading yaml file {} : {}".format(config_file, e))
        self.config = ModelConfig(config_dict=config_dict)
        self.two_layer_net = TwoLayerNet(self.config, train_hidden=True)

        r, k, n = 1., 4, 5000
        d = config_dict['architecture']['input_size']

        data = random.RandomData(k=k, r=r, d=d, n=n)
        data.generate_samples()
        self.ds = dataset.RandomDataset(data)
        self.data_loader = DataLoader(self.ds, shuffle=True, batch_size=BATCH_SIZE)
    def setUp(self) -> None:
        config_dict = read_yaml(CONFIG_PATH)
        L = 4
        width = 1024
        mean = 1.0

        self.input_size = config_dict['architecture']['input_size']
        self.base_lr = config_dict['optimizer']['params']['lr']
        self.n_warmup_steps = 1
        self.width = width
        config_dict['architecture']['width'] = width
        self.L = L
        self.mean = mean
        config_dict['architecture']['n_layers'] = L + 1
        config_dict['optimizer']['params']['lr'] = self.base_lr

        self.base_model_config = ModelConfig(config_dict)
        self.base_model_config.initializer.params["mean"] = mean
        self.ip_non_centered = StandardFCIP(self.base_model_config)
    def setUp(self) -> None:
        # define model
        config = ModelConfig(
            config_file=os.path.join(RESOURCES_DIR, 'resnet_config.yaml'))
        self.resnet = ResNetMNIST(config)

        # set up train and val data loaders
        n_train = int(RATIO_TRAIN * N_SAMPLES)
        n_val = N_SAMPLES - n_train
        indexes = list(range(N_SAMPLES))
        np.random.shuffle(indexes)
        train_indexes = indexes[:n_train]
        val_indexes = indexes[n_train:]

        self.train_data_loader = self.resnet.train_dataloader(
            data_dir=DATA_DIR,
            download=False,
            batch_size=BATCH_SIZE,
            indexes=train_indexes)

        self.val_data_loader = self.resnet.train_dataloader(
            data_dir=DATA_DIR,
            download=False,
            batch_size=BATCH_SIZE,
            indexes=val_indexes)

        self.test_dataloader = self.resnet.test_dataloader(
            data_dir=DATA_DIR, download=False, batch_size=BATCH_SIZE)

        self.tb_logger = TensorBoardLogger(save_dir=SAVE_DIR,
                                           version=version,
                                           name=NAME)
        checkpoints_path = os.path.join(
            SAVE_DIR, NAME, version, 'checkpoints',
            '{epoch}_{val_accuracy:.3f}_{val_loss:.3f}_{val_auc:.3f}')
        self.checkpoint_callback = ModelCheckpoint(filepath=checkpoints_path,
                                                   save_top_k=3,
                                                   verbose=True,
                                                   monitor='val_accuracy',
                                                   mode='max',
                                                   prefix='')
Beispiel #13
0
    def setUp(self) -> None:
        config_dict = read_yaml(CONFIG_PATH)
        self.base_model_config = ModelConfig(config_dict)
        self.width = 0

        self.standard_ip = StandardFCIP(self.base_model_config, self.width)
Beispiel #14
0
 def setUp(self) -> None:
     self.config = ModelConfig(config_file=os.path.join(RESOURCES_DIR, 'resnet_config.yaml'))
Beispiel #15
0
def main(activation="relu", base_lr=0.01, batch_size=512, dataset="mnist"):
    config_path = os.path.join(CONFIG_PATH, 'fc_ipllr_{}.yaml'.format(dataset))
    figures_dir = os.path.join(FIGURES_DIR, dataset)
    create_dir(figures_dir)
    log_path = os.path.join(figures_dir, 'log_muP_{}.txt'.format(activation))
    logger = set_up_logger(log_path)

    logger.info('Parameters of the run:')
    logger.info('activation = {}'.format(activation))
    logger.info('base_lr = {}'.format(base_lr))
    logger.info('batch_size = {:,}'.format(batch_size))
    logger.info('Random SEED : {:,}'.format(SEED))
    logger.info(
        'Number of random trials for each model : {:,}'.format(N_TRIALS))

    try:
        set_random_seeds(SEED)  # set random seed for reproducibility
        config_dict = read_yaml(config_path)

        version = 'L={}_m={}_act={}_lr={}_bs={}'.format(
            L, width, activation, base_lr, batch_size)
        template_name = 'muP_{}_ranks_{}_' + version

        config_dict['architecture']['width'] = width
        config_dict['architecture']['n_layers'] = L + 1
        config_dict['optimizer']['params']['lr'] = base_lr
        config_dict['activation']['name'] = activation

        base_model_config = ModelConfig(config_dict)

        # Load data & define models
        logger.info('Loading data ...')
        if dataset == 'mnist':
            from utils.dataset.mnist import load_data
        elif dataset == 'cifar10':
            from utils.dataset.cifar10 import load_data
        elif dataset == 'cifar100':
            # TODO : add cifar100 to utils.dataset
            pass
        else:
            error = ValueError(
                "dataset must be one of ['mnist', 'cifar10', 'cifar100'] but was {}"
                .format(dataset))
            logger.error(error)
            raise error

        training_dataset, test_dataset = load_data(download=False,
                                                   flatten=True)
        train_data_loader = DataLoader(training_dataset,
                                       shuffle=True,
                                       batch_size=batch_size)
        batches = list(train_data_loader)

        full_x = torch.cat([a for a, _ in batches], dim=0)
        full_y = torch.cat([b for _, b in batches], dim=0)

        logger.info('Defining models')
        base_model_config.scheduler = None
        muPs = [FCmuP(base_model_config) for _ in range(N_TRIALS)]

        for muP in muPs:
            for i, param_group in enumerate(muP.optimizer.param_groups):
                if i == 0:
                    param_group['lr'] = param_group['lr'] * (muP.d + 1)

        # save initial models
        muPs_0 = [deepcopy(muP) for muP in muPs]

        # train model one step
        logger.info('Training model a first step (t=1)')
        x, y = batches[0]
        muPs_1 = []
        for muP in muPs:
            train_model_one_step(muP, x, y, normalize_first=True)
            muPs_1.append(deepcopy(muP))

        # train models for a second step
        logger.info('Training model a second step (t=2)')
        x, y = batches[1]
        muPs_2 = []
        for muP in muPs:
            train_model_one_step(muP, x, y, normalize_first=True)
            muPs_2.append(deepcopy(muP))

        # set eval mode for all models
        for i in range(N_TRIALS):
            muPs[i].eval()
            muPs_0[i].eval()
            muPs_1[i].eval()
            muPs_2[i].eval()

        logger.info('Storing initial and update matrices')
        # define W0 and b0
        W0s = []
        b0s = []
        for muP_0 in muPs_0:
            W0, b0 = get_W0_dict(muP_0, normalize_first=True)
            W0s.append(W0)
            b0s.append(b0)

        # define Delta_W_1 and Delta_b_1
        Delta_W_1s = []
        Delta_b_1s = []
        for i in range(N_TRIALS):
            Delta_W_1, Delta_b_1 = get_Delta_W1_dict(muPs_0[i],
                                                     muPs_1[i],
                                                     normalize_first=True)
            Delta_W_1s.append(Delta_W_1)
            Delta_b_1s.append(Delta_b_1)

        # define Delta_W_2 and Delta_b_2
        Delta_W_2s = []
        Delta_b_2s = []
        for i in range(N_TRIALS):
            Delta_W_2, Delta_b_2 = get_Delta_W2_dict(muPs_1[i],
                                                     muPs_2[i],
                                                     normalize_first=True)
            Delta_W_2s.append(Delta_W_2)
            Delta_b_2s.append(Delta_b_2)

        x, y = full_x, full_y  # compute pre-activations on full batch

        # contributions after first step
        h0s = []
        delta_h_1s = []
        h1s = []
        x1s = []
        for i in range(N_TRIALS):
            h0, delta_h_1, h1, x1 = get_contributions_1(x,
                                                        muPs_1[i],
                                                        W0s[i],
                                                        b0s[i],
                                                        Delta_W_1s[i],
                                                        Delta_b_1s[i],
                                                        normalize_first=True)
            h0s.append(h0)
            delta_h_1s.append(delta_h_1)
            h1s.append(h0)
            x1s.append(x1)

        # ranks of initial weight matrices and first two updates
        logger.info('Computing ranks of weight matrices ...')
        weight_ranks_dfs_dict = dict()

        tol = None
        weight_ranks_dfs_dict['svd_default'] = [
            get_svd_ranks_weights(W0s[i],
                                  Delta_W_1s[i],
                                  Delta_W_2s[i],
                                  L,
                                  tol=tol) for i in range(N_TRIALS)
        ]

        tol = 1e-7
        weight_ranks_dfs_dict['svd_tol'] = [
            get_svd_ranks_weights(W0s[i],
                                  Delta_W_1s[i],
                                  Delta_W_2s[i],
                                  L,
                                  tol=tol) for i in range(N_TRIALS)
        ]

        weight_ranks_dfs_dict['squared_tr'] = [
            get_square_trace_ranks_weights(W0s[i], Delta_W_1s[i],
                                           Delta_W_2s[i], L)
            for i in range(N_TRIALS)
        ]

        weight_ranks_df_dict = {
            key: get_concatenated_ranks_df(weight_ranks_dfs_dict[key])
            for key in weight_ranks_dfs_dict.keys()
        }
        avg_ranks_df_dict = {
            key: get_avg_ranks_dfs(weight_ranks_df_dict[key])
            for key in weight_ranks_df_dict.keys()
        }

        logger.info('Saving weights ranks data frames to csv ...')
        for key in weight_ranks_df_dict.keys():
            logger.info(key)
            logger.info('\n' + str(avg_ranks_df_dict[key]) + '\n\n')
            avg_ranks_df_dict[key].to_csv(os.path.join(
                figures_dir,
                template_name.format(key, 'weights') + '.csv'),
                                          header=True,
                                          index=True)

        ranks_dfs = [
            weight_ranks_df_dict['svd_default'],
            weight_ranks_df_dict['svd_tol'], weight_ranks_df_dict['squared_tr']
        ]

        # plot weights ranks
        logger.info('Plotting weights ranks')
        plt.figure(figsize=(12, 6))
        plot_weights_ranks_vs_layer('W0',
                                    ranks_dfs,
                                    tol,
                                    L,
                                    width,
                                    base_lr,
                                    batch_size,
                                    y_scale='log')
        plt.savefig(
            os.path.join(figures_dir,
                         template_name.format('W0', 'weights') + '.png'))

        plt.figure(figsize=(12, 6))
        plot_weights_ranks_vs_layer('Delta_W_1',
                                    ranks_dfs,
                                    tol,
                                    L,
                                    width,
                                    base_lr,
                                    batch_size,
                                    y_scale='log')
        plt.savefig(
            os.path.join(figures_dir,
                         template_name.format('Delta_W_1', 'weights') +
                         '.png'))

        plt.figure(figsize=(12, 6))
        plot_weights_ranks_vs_layer('Delta_W_2',
                                    ranks_dfs,
                                    tol,
                                    L,
                                    width,
                                    base_lr,
                                    batch_size,
                                    y_scale='log')
        plt.savefig(
            os.path.join(figures_dir,
                         template_name.format('Delta_W_2', 'weights') +
                         '.png'))

        # ranks of the pre-activations
        logger.info('Computing ranks of (pre-)activations ...')
        act_ranks_dfs_dict = dict()

        tol = None
        act_ranks_dfs_dict['svd_default'] = [
            get_svd_ranks_acts(h0s[i],
                               delta_h_1s[i],
                               h1s[i],
                               x1s[i],
                               L,
                               tol=tol) for i in range(N_TRIALS)
        ]

        tol = 1e-7
        act_ranks_dfs_dict['svd_tol'] = [
            get_svd_ranks_acts(h0s[i],
                               delta_h_1s[i],
                               h1s[i],
                               x1s[i],
                               L,
                               tol=tol) for i in range(N_TRIALS)
        ]

        act_ranks_dfs_dict['squared_tr'] = [
            get_square_trace_ranks_acts(h0s[i], delta_h_1s[i], h1s[i], x1s[i],
                                        L) for i in range(N_TRIALS)
        ]

        act_ranks_df_dict = {
            key: get_concatenated_ranks_df(act_ranks_dfs_dict[key])
            for key in act_ranks_dfs_dict.keys()
        }
        avg_ranks_df_dict = {
            key: get_avg_ranks_dfs(act_ranks_df_dict[key])
            for key in act_ranks_df_dict.keys()
        }

        logger.info('Saving (pre-)activation ranks data frames to csv ...')
        for key in avg_ranks_df_dict.keys():
            logger.info(key)
            logger.info('\n' + str(avg_ranks_df_dict[key]) + '\n\n')
            avg_ranks_df_dict[key].to_csv(os.path.join(
                figures_dir,
                template_name.format(key, 'acts') + '.csv'),
                                          header=True,
                                          index=True)

        ranks_dfs = [
            act_ranks_df_dict['svd_default'], act_ranks_df_dict['svd_tol'],
            act_ranks_df_dict['squared_tr']
        ]

        logger.info('Plotting (pre-)activation ranks')
        plt.figure(figsize=(12, 6))
        plot_acts_ranks_vs_layer('h0',
                                 ranks_dfs,
                                 tol,
                                 L,
                                 width,
                                 base_lr,
                                 batch_size,
                                 y_scale='log')
        plt.savefig(
            os.path.join(figures_dir,
                         template_name.format('h0', 'acts') + '.png'))

        plt.figure(figsize=(12, 6))
        plot_acts_ranks_vs_layer('h1',
                                 ranks_dfs,
                                 tol,
                                 L,
                                 width,
                                 base_lr,
                                 batch_size,
                                 y_scale='log')
        plt.savefig(
            os.path.join(figures_dir,
                         template_name.format('h1', 'acts') + '.png'))

        plt.figure(figsize=(12, 6))
        plot_acts_ranks_vs_layer('x1',
                                 ranks_dfs,
                                 tol,
                                 L,
                                 width,
                                 base_lr,
                                 batch_size,
                                 y_scale='log')
        plt.savefig(
            os.path.join(figures_dir,
                         template_name.format('x1', 'acts') + '.png'))

        plt.figure(figsize=(12, 6))
        plot_acts_ranks_vs_layer('delta_h_1',
                                 ranks_dfs,
                                 tol,
                                 L,
                                 width,
                                 base_lr,
                                 batch_size,
                                 y_scale='log')
        plt.savefig(
            os.path.join(figures_dir,
                         template_name.format('delta_h_1', 'acts') + '.png'))

        # diversity in terms of the index of the maximum entry
        logger.info(
            'Computing diversity of the maximum entry of pre-activations...')
        max_acts_diversity_dfs = [
            get_max_acts_diversity(h0s[i], delta_h_1s[i], h1s[i], L)
            for i in range(N_TRIALS)
        ]
        max_acts_diversity_df = get_concatenated_ranks_df(
            max_acts_diversity_dfs)
        avg_max_acts_diversity_df = get_avg_ranks_dfs(max_acts_diversity_df)
        logger.info('Diversity of the maximum activation index df:')
        logger.info(str(avg_max_acts_diversity_df))
        avg_max_acts_diversity_df.to_csv(os.path.join(
            figures_dir, 'muP_max_acts_' + version + '.csv'),
                                         header=True,
                                         index=True)

    except Exception as e:
        logger.exception("Exception when running the script : {}".format(e))
Beispiel #16
0
def main(activation="relu",
         n_steps=300,
         base_lr=0.01,
         batch_size=512,
         dataset="mnist"):
    config_path = os.path.join(CONFIG_PATH, 'fc_ipllr_{}.yaml'.format(dataset))
    figures_dir = os.path.join(FIGURES_DIR, dataset)
    create_dir(figures_dir)
    log_path = os.path.join(figures_dir, 'log_muP_{}.txt'.format(activation))
    logger = set_up_logger(log_path)

    logger.info('Parameters of the run:')
    logger.info('activation = {}'.format(activation))
    logger.info('n_steps = {:,}'.format(n_steps))
    logger.info('base_lr = {}'.format(base_lr))
    logger.info('batch_size = {:,}'.format(batch_size))
    logger.info('Random SEED : {:,}'.format(SEED))
    logger.info(
        'Number of random trials for each model : {:,}'.format(N_TRIALS))

    try:
        set_random_seeds(SEED)  # set random seed for reproducibility
        config_dict = read_yaml(config_path)

        fig_name_template = 'muP_{}_{}_L={}_m={}_act={}_lr={}_bs={}.png'

        config_dict['architecture']['width'] = width
        config_dict['architecture']['n_layers'] = L + 1
        config_dict['optimizer']['params']['lr'] = base_lr
        config_dict['activation']['name'] = activation

        base_model_config = ModelConfig(config_dict)

        # Load data & define models
        logger.info('Loading data ...')
        if dataset == 'mnist':
            from utils.dataset.mnist import load_data
        elif dataset == 'cifar10':
            from utils.dataset.cifar10 import load_data
        elif dataset == 'cifar100':
            # TODO : add cifar100 to utils.dataset
            config_dict['architecture']['output_size'] = 100
            pass
        else:
            error = ValueError(
                "dataset must be one of ['mnist', 'cifar10', 'cifar100'] but was {}"
                .format(dataset))
            logger.error(error)
            raise error

        training_dataset, test_dataset = load_data(download=False,
                                                   flatten=True)
        train_data_loader = DataLoader(training_dataset,
                                       shuffle=True,
                                       batch_size=batch_size)
        batches = list(train_data_loader)

        logger.info('Defining models')
        base_model_config.scheduler = None
        muPs = [FCmuP(base_model_config) for _ in range(N_TRIALS)]
        muPs_renorm = [FCmuP(base_model_config) for _ in range(N_TRIALS)]
        muPs_renorm_scale_lr = [
            FCmuP(base_model_config) for _ in range(N_TRIALS)
        ]

        for muP in muPs_renorm_scale_lr:
            for i, param_group in enumerate(muP.optimizer.param_groups):
                if i == 0:
                    param_group['lr'] = param_group['lr'] * (muP.d + 1)

        logger.info('Copying parameters of base muP')
        for i in range(N_TRIALS):
            muPs_renorm[i].copy_initial_params_from_model(muPs[i])
            muPs_renorm_scale_lr[i].copy_initial_params_from_model(muPs[i])

            muPs_renorm[i].initialize_params()
            muPs_renorm_scale_lr[i].initialize_params()

        results = dict()
        logger.info('Generating training results ...')
        results['muP'] = [
            collect_training_losses(muPs[i],
                                    batches,
                                    n_steps,
                                    normalize_first=False)
            for i in range(N_TRIALS)
        ]

        results['muP_renorm'] = [
            collect_training_losses(muPs_renorm[i],
                                    batches,
                                    n_steps,
                                    normalize_first=True)
            for i in range(N_TRIALS)
        ]

        results['muP_renorm_scale_lr'] = [
            collect_training_losses(muPs_renorm_scale_lr[i],
                                    batches,
                                    n_steps,
                                    normalize_first=True)
            for i in range(N_TRIALS)
        ]

        mode = 'training'
        losses = dict()
        for key, res in results.items():
            losses[key] = [r[0] for r in res]

        chis = dict()
        for key, res in results.items():
            chis[key] = [r[1] for r in res]

        # Plot losses and derivatives
        logger.info('Saving figures at {}'.format(figures_dir))
        key = 'loss'
        plt.figure(figsize=(12, 8))
        plot_losses_models(losses,
                           key=key,
                           L=L,
                           width=width,
                           activation=activation,
                           lr=base_lr,
                           batch_size=batch_size,
                           mode=mode,
                           normalize_first=renorm_first,
                           marker=None,
                           name='muP')
        plt.ylim(0, 2.5)
        plt.savefig(
            os.path.join(
                figures_dir,
                fig_name_template.format(mode, key, L, width, activation,
                                         base_lr, batch_size)))

        key = 'chi'
        plt.figure(figsize=(12, 8))
        plot_losses_models(chis,
                           key=key,
                           L=L,
                           width=width,
                           activation=activation,
                           lr=base_lr,
                           batch_size=batch_size,
                           mode=mode,
                           marker=None,
                           name='muP')
        plt.savefig(
            os.path.join(
                figures_dir,
                fig_name_template.format(mode, key, L, width, activation,
                                         base_lr, batch_size)))

    except Exception as e:
        logger.exception("Exception when running the script : {}".format(e))
Beispiel #17
0
    def _run_trial(self, idx):
        trial_name = 'trial_{}'.format(idx + 1)
        self.trial_dir = os.path.join(
            self.base_experiment_path,
            trial_name)  # folder to hold trial results

        if not os.path.exists(
                self.trial_dir):  # run trial only if it doesn't already exist
            create_dir(self.trial_dir)  # directory to save the trial
            set_random_seeds(
                self.trial_seeds[idx])  # set random seed for the trial

            self._set_tb_logger_and_callbacks(
                trial_name)  # tb logger, checkpoints and early stopping

            log_dir = os.path.join(
                self.trial_dir,
                self.LOG_NAME)  # define path to save the logs of the trial
            logger = set_up_logger(log_dir)

            config = ModelConfig(
                config_dict=self.config_dict
            )  # define the config as a class to pass to the model
            model = self.model(config)  # define the model

            logger.info('----- Trial {:,} ----- with model config {}\n'.format(
                idx + 1, self.model_config))
            self._log_experiment_info(len(self.train_dataset),
                                      len(self.val_dataset),
                                      len(self.test_dataset), model.std)
            logger.info('Random seed used for the script : {:,}'.format(
                self.SEED))
            logger.info('Number of model parameters : {:,}'.format(
                model.count_parameters()))
            logger.info('Model architecture :\n{}\n'.format(model))

            try:
                # training and validation pipeline
                trainer = pl.Trainer(
                    max_epochs=self.max_epochs,
                    max_steps=self.max_steps,
                    logger=self.tb_logger,
                    checkpoint_callback=self.checkpoint_callback,
                    num_sanity_val_steps=0,
                    early_stop_callback=self.early_stopping_callback)
                trainer.fit(model=model,
                            train_dataloader=self.train_data_loader,
                            val_dataloaders=self.val_data_loader)

                # test pipeline
                test_results = trainer.test(
                    model=model, test_dataloaders=self.test_data_loader)
                logger.info('Test results :\n{}\n'.format(test_results))

                # save all training, val and test results to pickle file
                with open(os.path.join(self.trial_dir, self.RESULTS_FILE),
                          'wb') as file:
                    pickle.dump(model.results, file)

            except Exception as e:
                # dump and save results before exiting
                with open(os.path.join(self.trial_dir, self.RESULTS_FILE),
                          'wb') as file:
                    pickle.dump(model.results, file)
                logger.warning('model results dumped before interruption')
                logger.exception(
                    "Exception while running the train-val-test pipeline : {}".
                    format(e))
                raise Exception(e)

        else:
            logging.warning(
                "Directory for trial {:,} of experiment {} already exists".
                format(idx, self.model_config))
Beispiel #18
0
def main(activation="relu",
         n_steps=300,
         base_lr=0.01,
         batch_size=512,
         dataset="mnist"):
    config_path = os.path.join(CONFIG_PATH, 'fc_ipllr_{}.yaml'.format(dataset))
    figures_dir = os.path.join(FIGURES_DIR, dataset)
    create_dir(figures_dir)
    log_path = os.path.join(figures_dir, 'log_ipllr_{}.txt'.format(activation))
    logger = set_up_logger(log_path)

    logger.info('Parameters of the run:')
    logger.info('activation = {}'.format(activation))
    logger.info('n_steps = {:,}'.format(n_steps))
    logger.info('base_lr = {}'.format(base_lr))
    logger.info('batch_size = {:,}'.format(batch_size))
    logger.info('dataset = {}'.format(dataset))
    logger.info('Random SEED : {:,}'.format(SEED))
    logger.info(
        'Number of random trials for each model : {:,}'.format(N_TRIALS))

    try:
        set_random_seeds(SEED)  # set random seed for reproducibility
        config_dict = read_yaml(config_path)

        fig_name_template = 'IPLLRs_1_last_small_{}_{}_L={}_m={}_act={}_lr={}_bs={}.png'

        config_dict['architecture']['width'] = width
        config_dict['architecture']['n_layers'] = L + 1
        config_dict['optimizer']['params']['lr'] = base_lr
        config_dict['activation']['name'] = activation
        config_dict['scheduler'] = {
            'name': 'warmup_switch',
            'params': {
                'n_warmup_steps': n_warmup_steps,
                'calibrate_base_lr': True,
                'default_calibration': False
            }
        }

        # Load data & define models
        logger.info('Loading data ...')
        if dataset == 'mnist':
            from utils.dataset.mnist import load_data
        elif dataset == 'cifar10':
            from utils.dataset.cifar10 import load_data
        elif dataset == 'cifar100':
            # TODO : add cifar100 to utils.dataset
            pass
        else:
            error = ValueError(
                "dataset must be one of ['mnist', 'cifar10', 'cifar100'] but was {}"
                .format(dataset))
            logger.error(error)
            raise error

        training_dataset, test_dataset = load_data(download=False,
                                                   flatten=True)
        train_data_loader = DataLoader(training_dataset,
                                       shuffle=True,
                                       batch_size=batch_size)
        batches = list(train_data_loader)
        logger.info('Number of batches (steps) per epoch : {:,}'.format(
            len(batches)))
        logger.info('Number of epochs : {:,}'.format(n_steps // len(batches)))

        config_dict['scheduler']['params']['calibrate_base_lr'] = False
        config = ModelConfig(config_dict)

        logger.info('Defining models')
        ipllrs = [FcIPLLR(config) for _ in range(N_TRIALS)]

        config_dict['scheduler']['params']['calibrate_base_lr'] = True
        config = ModelConfig(config_dict)
        ipllrs_calib = [
            FcIPLLR(config, lr_calibration_batches=batches)
            for _ in range(N_TRIALS)
        ]
        ipllrs_calib_renorm = [
            FcIPLLR(config, lr_calibration_batches=batches)
            for _ in range(N_TRIALS)
        ]
        ipllrs_calib_renorm_scale_lr = [
            FcIPLLR(config, lr_calibration_batches=batches)
            for _ in range(N_TRIALS)
        ]

        logger.info('Copying parameters of base ipllr')
        for i in range(N_TRIALS):
            ipllrs_calib[i].copy_initial_params_from_model(ipllrs[i])
            ipllrs_calib_renorm[i].copy_initial_params_from_model(ipllrs[i])
            ipllrs_calib_renorm_scale_lr[i].copy_initial_params_from_model(
                ipllrs[i])

            ipllrs_calib[i].initialize_params()
            ipllrs_calib_renorm[i].initialize_params()
            ipllrs_calib_renorm_scale_lr[i].initialize_params()

        # Make sure calibration takes into account normalization
        logger.info('Recalibrating lrs with new initialisation')
        for ipllr in ipllrs_calib:
            initial_base_lrs = ipllr.scheduler.calibrate_base_lr(
                ipllr, batches=batches, normalize_first=False)
            ipllr.scheduler._set_param_group_lrs(initial_base_lrs)

        for ipllr in ipllrs_calib_renorm:
            initial_base_lrs = ipllr.scheduler.calibrate_base_lr(
                ipllr, batches=batches, normalize_first=True)
            ipllr.scheduler._set_param_group_lrs(initial_base_lrs)

        for ipllr in ipllrs_calib_renorm_scale_lr:
            initial_base_lrs = ipllr.scheduler.calibrate_base_lr(
                ipllr, batches=batches, normalize_first=True)
            ipllr.scheduler._set_param_group_lrs(initial_base_lrs)

        # scale lr of first layer if needed
        for ipllr in ipllrs_calib_renorm_scale_lr:
            ipllr.scheduler.warm_lrs[0] = ipllr.scheduler.warm_lrs[0] * (
                ipllr.d + 1)

        # with calibration
        results = dict()
        logger.info('Generating training results ...')
        results['ipllr_calib'] = [
            collect_training_losses(ipllrs_calib[i],
                                    batches,
                                    n_steps,
                                    normalize_first=False)
            for i in range(N_TRIALS)
        ]

        results['ipllr_calib_renorm'] = [
            collect_training_losses(ipllrs_calib_renorm[i],
                                    batches,
                                    n_steps,
                                    normalize_first=True)
            for i in range(N_TRIALS)
        ]

        results['ipllr_calib_renorm_scale_lr'] = [
            collect_training_losses(ipllrs_calib_renorm_scale_lr[i],
                                    batches,
                                    n_steps,
                                    normalize_first=True)
            for i in range(N_TRIALS)
        ]

        mode = 'training'
        losses = dict()
        for key, res in results.items():
            losses[key] = [r[0] for r in res]

        chis = dict()
        for key, res in results.items():
            chis[key] = [r[1] for r in res]

        # Plot losses and derivatives
        logger.info('Saving figures at {}'.format(figures_dir))
        key = 'loss'
        plt.figure(figsize=(12, 8))
        plot_losses_models(losses,
                           key=key,
                           L=L,
                           width=width,
                           activation=activation,
                           lr=base_lr,
                           batch_size=batch_size,
                           mode=mode,
                           normalize_first=renorm_first,
                           marker=None,
                           name='IPLLR')

        plt.savefig(
            os.path.join(
                figures_dir,
                fig_name_template.format(mode, key, L, width, activation,
                                         base_lr, batch_size)))

        key = 'chi'
        plt.figure(figsize=(12, 8))
        plot_losses_models(chis,
                           key=key,
                           L=L,
                           width=width,
                           activation=activation,
                           lr=base_lr,
                           batch_size=batch_size,
                           mode=mode,
                           marker=None,
                           name='IPLLR')
        plt.savefig(
            os.path.join(
                figures_dir,
                fig_name_template.format(mode, key, L, width, activation,
                                         base_lr, batch_size)))

    except Exception as e:
        logger.exception("Exception when running the script : {}".format(e))
Beispiel #19
0
 def setUp(self) -> None:
     config = ModelConfig(
         config_file=os.path.join(RESOURCES_DIR, 'resnet_config.yaml'))
     self.resnet = ResNetMNIST(config)