Ejemplo n.º 1
0
    def _initOptimizer(self):
        modelParallel = self.getModelParallel()
        args = self.getArgs()

        optimizer = SGD(modelParallel.parameters(),
                        args.learning_rate,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay)
        # load optimizer pre-trained state dict if exists
        if self.optimizerStateDict:
            optimizer.load_state_dict(self.optimizerStateDict)

        return optimizer
class RestrictedBoltzmannMachines(nn.Module, ObservableData):
    '''
    Restricted Boltzmann Machines(RBM).
    
    According to graph theory, the structure of RBM corresponds to 
    a complete bipartite graph which is a special kind of bipartite 
    graph where every node in the visible layer is connected to every 
    node in the hidden layer. Based on statistical mechanics and 
    thermodynamics(Ackley, D. H., Hinton, G. E., & Sejnowski, T. J. 1985), 
    the state of this structure can be reflected by the energy function.

    In relation to RBM, the Contrastive Divergence(CD) is a method for 
    approximation of the gradients of the log-likelihood(Hinton, G. E. 2002).
    This algorithm draws a distinction between a positive phase and a 
    negative phase. Conceptually, the positive phase is to the negative 
    phase what waking is to sleeping.

    The procedure of this method is similar to Markov Chain Monte Carlo method(MCMC).
    However, unlike MCMC, the visbile variables to be set first in visible layer is 
    not randomly initialized but the observed data points in training dataset are set 
    to the first visbile variables. And, like Gibbs sampler, drawing samples from hidden 
    variables and visible variables is repeated k times. Empirically (and surprisingly), 
    `k` is considered to be `1`.

    **Note** that this class does not support a *Hybrid* of imperative and symbolic 
    programming. Only `mxnet.ndarray` is supported.

    References:
        - Ackley, D. H., Hinton, G. E., & Sejnowski, T. J. (1985). A learning algorithm for Boltzmann machines. Cognitive science, 9(1), 147-169.
        - Hinton, G. E. (2002). Training products of experts by minimizing contrastive divergence. Neural computation, 14(8), 1771-1800.
        - Le Roux, N., & Bengio, Y. (2008). Representational power of restricted Boltzmann machines and deep belief networks. Neural computation, 20(6), 1631-1649.
    '''

    # The list of losses.
    __loss_arr = []
    # Learning rate.
    __learning_rate = 0.5
    # Batch size in learning.
    __batch_size = 0
    # Batch size in inference(recursive learning or not).
    __r_batch_size = 0

    def __init__(
        self,
        computable_loss,
        initializer_f=None,
        optimizer_f=None,
        visible_activation=torch.nn.Sigmoid(),
        hidden_activation=torch.nn.Sigmoid(),
        visible_dim=1000,
        hidden_dim=100,
        learning_rate=0.005,
        visible_dropout_rate=0.0,
        hidden_dropout_rate=0.0,
        visible_batch_norm=None,
        hidden_batch_norm=None,
        regularizatable_data_list=[],
        ctx="cpu",
    ):
        '''
        Init.
        
        Args:
            computable_loss:            is-a `ComputableLoss`.
            visible_activation:         `mxnet.ndarray.Activation` or `mxnet.symbol.Activation` in visible layer.
            hidden_activation:          `mxnet.ndarray.Activation` or `mxnet.symbol.Activation` in hidden layer.
            visible_dim:                `int` of dimension in visible layer.
            hidden_dim:                 `int` of dimension in hidden layer.
            initializer:                is-a `mxnet.initializer` for parameters of model. If `None`, it is drawing from the Xavier distribution.
            optimizer_name:             `str` of name of optimizer.
            learning_rate:              `float` of learning rate.
            learning_attenuate_rate:    `float` of attenuate the `learning_rate` by a factor of this value every `attenuate_epoch`.
            attenuate_epoch:            `int` of attenuate the `learning_rate` by a factor of `learning_attenuate_rate` every `attenuate_epoch`.
            visible_dropout_rate:       `float` of dropout rate in visible layer.
            hidden_dropout_rate:        `float` of dropout rate in hidden layer.
            visible_batch_norm:         `gluon.nn.BatchNorm` in visible layer.
            hidden_batch_norm:          `gluon.nn.BatchNorm` in hidden layer.
            regularizatable_data_list:  `list` of `RegularizatableData`s.
            ctx:                        `mx.gpu()` or `mx.cpu()`.
        '''
        super(RestrictedBoltzmannMachines, self).__init__()

        for v in regularizatable_data_list:
            if isinstance(v, RegularizatableData) is False:
                raise TypeError(
                    "The type of values of `regularizatable_data_list` must be `RegularizatableData`."
                )
        self.__regularizatable_data_list = regularizatable_data_list

        self.__computable_loss = computable_loss
        self.visible_activation = visible_activation
        self.hidden_activation = hidden_activation

        self.__visible_unit = nn.Linear(
            visible_dim,
            hidden_dim,
            bias=True,
        )

        self.visible_dropout_forward = None
        if visible_dropout_rate > 0:
            self.visible_dropout_forward = nn.Dropout(p=visible_dropout_rate)

        self.hidden_dropout_forward = None
        if hidden_dropout_rate > 0:
            self.hidden_dropout_forward = nn.Dropout(p=hidden_dropout_rate)

        self.visible_batch_norm = visible_batch_norm
        self.hidden_batch_norm = hidden_batch_norm

        self.__ctx = ctx
        self.to(self.__ctx)

        if initializer_f is not None:
            self.__visible_unit.weight = initializer_f(
                self.__visible_unit.weight)
        else:
            self.__visible_unit.weight = torch.nn.init.xavier_normal_(
                self.__visible_unit.weight, gain=1.0)

        if optimizer_f is not None:
            self.optimizer = optimizer_f(self.parameters())
        else:
            self.optimizer = SGD(
                self.parameters(),
                lr=self.__learning_rate,
            )

        self.__learning_rate = learning_rate

        self.__loss_arr = np.array([])

        logger = getLogger("accelbrainbase")
        self.__logger = logger

        self.__loss_list = []
        self.__test_loss_list = []

        self.epoch = 0

    def learn(self, iteratable_data):
        '''
        Learn samples drawn by `IteratableData.generate_learned_samples()`.

        Args:
            iteratable_data:     is-a `IteratableData`.

        '''
        if isinstance(iteratable_data, IteratableData) is False:
            raise TypeError(
                "The type of `iteratable_data` must be `IteratableData`.")

        self.__loss_list = []
        self.__test_loss_list = []

        try:
            epoch = self.epoch
            iter_n = 0
            for observed_arr, label_arr, test_observed_arr, test_label_arr in iteratable_data.generate_learned_samples(
            ):
                self.batch_size = observed_arr.shape[0]
                observed_arr = observed_arr.reshape((self.batch_size, -1))
                test_observed_arr = test_observed_arr.reshape(
                    (self.batch_size, -1))

                self.optimizer.zero_grad()
                visible_activity_arr = self.inference(observed_arr)
                loss = self.compute_loss(observed_arr, visible_activity_arr)
                loss.backward()
                self.optimizer.step()
                self.regularize()

                if (iter_n + 1) % int(
                        iteratable_data.iter_n / iteratable_data.epochs) == 0:
                    with torch.inference_mode():
                        test_visible_activity_arr = self.inference(
                            test_observed_arr)
                        test_loss = self.compute_loss(
                            test_observed_arr, test_visible_activity_arr)
                    _loss = loss.to('cpu').detach().numpy().copy()
                    _test_loss = test_loss.to('cpu').detach().numpy().copy()

                    self.__loss_list.append(_loss)
                    self.__test_loss_list.append(_test_loss)
                    self.__logger.debug("Epoch: " + str(epoch + 1) +
                                        " Train loss: " +
                                        str(self.__loss_list[-1]) +
                                        " Test loss: " +
                                        str(self.__test_loss_list[-1]))
                    epoch += 1
                iter_n += 1

        except KeyboardInterrupt:
            self.__logger.debug("Interrupt.")

        self.__logger.debug("end. ")

        self.__loss_arr = np.c_[
            np.array(self.__loss_list[:len(self.__test_loss_list)]),
            np.array(self.__test_loss_list)]
        self.epoch = epoch

    def inference(self, observed_arr):
        '''
        Inference samples drawn by `IteratableData.generate_inferenced_samples()`.

        Args:
            observed_arr:   rank-2 Array like or sparse matrix as the observed data points.
                            The shape is: (batch size, feature points)

        Returns:
            `mxnet.ndarray` of inferenced feature points.
        '''
        return self(observed_arr)

    def compute_loss(self, pred_arr, labeled_arr):
        '''
        Compute loss.

        Args:
            pred_arr:       `mxnet.ndarray` or `mxnet.symbol`.
            labeled_arr:    `mxnet.ndarray` or `mxnet.symbol`.

        Returns:
            loss.
        '''
        return self.__computable_loss(pred_arr, labeled_arr)

    def extract_learned_dict(self):
        '''
        Extract (pre-) learned parameters.

        Returns:
            `dict` of the parameters.
        '''
        params_dict = {}
        for k in self.state_dict().keys():
            params_dict.setdefault(k, self.state_dict()[k])

        return params_dict

    def forward(self, x):
        '''
        Forward with Gluon API.

        Args:
            F:      `mxnet.ndarray` or `mxnet.symbol`.
            x:      `mxnet.ndarray` of observed data points.
        
        Returns:
            `mxnet.ndarray` or `mxnet.symbol` of inferenced feature points.
        '''
        self.batch_size = x.shape[0]
        x = x.reshape((self.batch_size, -1))

        self.__visible_activity_arr = x

        x = self.__visible_unit(x)

        if self.visible_activation == "identity_adjusted":
            x = x / torch.sum(torch.ones_like(x))
        elif self.visible_activation != "identity":
            x = self.visible_activation(x)

        if self.visible_dropout_forward is not None:
            x = self.visible_dropout_forward(x)
        if self.visible_batch_norm is not None:
            x = self.visible_batch_norm(x)

        self.__hidden_activity_arr = x

        self.__diff_weights_arr = torch.mm(
            self.__visible_activity_arr.T,
            self.__hidden_activity_arr,
        )

        #self.__visible_diff_bias_arr += nd.nansum(self.__visible_activity_arr, axis=0)
        #self.__hidden_diff_bias_arr += nd.nansum(self.__hidden_activity_arr, axis=0)

        params_dict = self.extract_learned_dict()
        weight_keys_list = [
            key for key in params_dict.keys() if "weight" in key
        ]
        weights_arr = params_dict[weight_keys_list[0]]

        self.__visible_activity_arr = torch.mm(
            self.__hidden_activity_arr,
            weights_arr,
        )
        x = self.__visible_activity_arr
        if self.hidden_activation == "identity_adjusted":
            x = x / torch.sum(torch.ones_like(x))
        elif self.hidden_activation != "identity":
            x = self.hidden_activation(x)
        if self.hidden_dropout_forward is not None:
            x = self.hidden_dropout_forward(x)
        if self.hidden_batch_norm is not None:
            x = self.hidden_batch_norm(x)
        self.__visible_activity_arr = x

        self.__hidden_activity_arr = self.__visible_unit(
            self.__visible_activity_arr)
        x = self.__hidden_activity_arr
        if self.visible_activation == "identity_adjusted":
            x = x / torch.sum(torch.ones_like(x))
        elif self.visible_activation != "identity":
            x = self.visible_activation(x)
        if self.visible_dropout_forward is not None:
            x = self.visible_dropout_forward(x)
        if self.visible_batch_norm is not None:
            x = self.visible_batch_norm(x)
        self.__hidden_activity_arr = x

        self.__diff_weights_arr = self.__diff_weights_arr - torch.mm(
            self.__visible_activity_arr.T,
            self.__hidden_activity_arr,
        )
        #self.__visible_diff_bias_arr -= nd.nansum(self.__visible_activity_arr, axis=0)
        #self.__hidden_diff_bias_arr -= nd.nansum(self.__hidden_activity_arr, axis=0)

        return self.__visible_activity_arr

    def regularize(self):
        '''
        Regularization.
        '''
        if len(self.__regularizatable_data_list) > 0:
            params_dict = self.extract_learned_dict()
            for regularizatable in self.__regularizatable_data_list:
                params_dict = regularizatable.regularize(params_dict)

            for k, params in params_dict.items():
                self.load_state_dict({k: params}, strict=False)

    def save_parameters(self, filename):
        '''
        Save parameters to files.

        Args:
            filename:       File name.
        '''
        torch.save(
            {
                'epoch': self.epoch,
                'model_state_dict': self.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'loss': self.loss_arr,
            }, filename)

    def load_parameters(self, filename, ctx=None, strict=True):
        '''
        Load parameters to files.

        Args:
            filename:       File name.
            ctx:            Context-manager that changes the selected device.
            strict:         Whether to strictly enforce that the keys in state_dict match the keys returned by this module’s state_dict() function. Default: `True`.
        '''
        checkpoint = torch.load(filename)
        self.load_state_dict(checkpoint['model_state_dict'], strict=strict)
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.epoch = checkpoint['epoch']
        self.__loss_list = checkpoint['loss'].tolist()
        if ctx is not None:
            self.to(ctx)
            self.__ctx = ctx

    def set_readonly(self, value):
        ''' setter '''
        raise TypeError("This is read-only.")

    def get_loss_list(self):
        ''' getter for `list` of losses in training. '''
        return self.__loss_list

    loss_list = property(get_loss_list, set_readonly)

    def get_test_loss_arr(self):
        ''' getter for `list` of losses in test. '''
        return self.__test_loss_list

    test_loss_list = property(get_test_loss_arr, set_readonly)

    def get_loss_arr(self):
        ''' getter for losses. '''
        return self.__loss_arr

    loss_arr = property(get_loss_arr, set_readonly)

    def get_feature_points_arr(self):
        ''' getter for `mxnet.narray` of feature points in middle hidden layer. '''
        return self.__hidden_activity_arr

    feature_points_arr = property(get_feature_points_arr, set_readonly)

    def get_weights_arr(self):
        ''' getter for `mxnet.ndarray` of weights matrics. '''
        return self.__weights_arr

    def set_weights_arr(self, value):
        ''' setter for `mxnet.ndarray` of weights matrics.'''
        self.__weights_arr = value

    weights_arr = property(get_weights_arr, set_weights_arr)

    def get_visible_bias_arr(self):
        ''' getter for `mxnet.ndarray` of biases in visible layer.'''
        return self.__visible_bias_arr

    def set_visible_bias_arr(self, value):
        ''' setter for `mxnet.ndarray` of biases in visible layer.'''
        self.__visible_bias_arr = value

    visible_bias_arr = property(get_visible_bias_arr, set_visible_bias_arr)

    def get_hidden_bias_arr(self):
        ''' getter for `mxnet.ndarray` of biases in hidden layer.'''
        return self.__hidden_bias_arr

    def set_hidden_bias_arr(self, value):
        ''' setter for `mxnet.ndarray` of biases in hidden layer.'''
        self.__hidden_bias_arr = value

    hidden_bias_arr = property(get_hidden_bias_arr, set_hidden_bias_arr)

    def get_visible_activity_arr(self):
        ''' getter for `mxnet.ndarray` of activities in visible layer.'''
        return self.__visible_activity_arr

    def set_visible_activity_arr(self, value):
        ''' setter for `mxnet.ndarray` of activities in visible layer.'''
        self.__visible_activity_arr = value

    visible_activity_arr = property(get_visible_activity_arr,
                                    set_visible_activity_arr)

    def get_hidden_activity_arr(self):
        ''' getter for `mxnet.ndarray` of activities in hidden layer.'''
        return self.__hidden_activity_arr

    def set_hidden_activity_arr(self, value):
        ''' setter for `mxnet.ndarray` of activities in hidden layer.'''
        self.__hidden_activity_arr = value

    hidden_activity_arr = property(get_hidden_activity_arr,
                                   set_hidden_activity_arr)
Ejemplo n.º 3
0
def main():
    # ============================= args setting =================================
    args = ArgumentsTrainVal().parse_args()

    print('***************************Arguments****************************')
    print(args)

    checkpoint = None
    resume_model = None
    resume_optimizer = None
    if args.resume_model is not None:
        checkpoint = torch.load(args.resume_model)
        if 'model' in checkpoint.keys():
            resume_model = checkpoint['model']
        else:
            resume_model = checkpoint
        if 'optimizer' in checkpoint.keys():
            resume_optimizer = checkpoint['optimizer']

    # ============================= model setting =================================

    model = construct_model(args, resume_model)
    print('--------------------------Model Info----------------------------')
    print(model)

    # ============================= training setting =================================

    train_iterator, validate_iterator = construct_train_dataloaders(args)

    criterion = utilities.loss.SoftMax2D(ignore_index=train_iterator.dataset.ignore_lbl)
    optimizer = SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)

    if resume_optimizer is not None:
        optimizer.load_state_dict(resume_optimizer)

    engine_set = dict(gpu_ids=args.gpu_ids,
                      network=model,
                      criterion=criterion,
                      train_iterator=train_iterator,
                      validate_iterator=validate_iterator,
                      optimizer=optimizer)

    # learning rate points
    lr_points = []
    if args.num_classes == 100:
        lr_points = [150, 225]
    print('==> Set lr_points: {}'.format(lr_points))

    if not os.path.exists(args.checkpoint_save_path):
        os.system('mkdir {}'.format(args.checkpoint_save_path))

    engine_args = dict(checkpoint_iter_freq=args.checkpoint_iter_freq,
                       checkpoint_epoch_freq=args.checkpoint_epoch_freq,
                       checkpoint_save_path=args.checkpoint_save_path,
                       iter_log_freq=args.iter_log_freq,
                       environment=args.environment,
                       lr_points=lr_points,
                       num_classes=args.num_classes)

    engine = construct_engine(engine_set, **engine_args)

    engine.resume(args.maxepoch, args.resume_epoch, args.resume_iteration)