Exemple #1
0
        def apiConstructor():
            # get trainable variables
            train_vars = tf.trainable_variables()
            if scope is not None:
                opt_vars = [var for var in train_vars if scope in var.name]
            else:
                opt_vars = train_vars

            lr_method = get_lr_scheduler(lr_scheduler, lr)
            global_step = tf.train.get_or_create_global_step()
            momentum = tf.train.MomentumOptimizer(lr_method, mom)
            if clip_grad is not None:
                grads = momentum.compute_gradients(loss, var_list=opt_vars)
                clipped_grads = [(tf.clip_by_value(grad, -1.0 * clip_grad,
                                                   1.0 * clip_grad), var)
                                 for grad, var in grads]
                train_op = momentum.apply_gradients(clipped_grads,
                                                    global_step=global_step)
            else:
                train_op = momentum.minimize(loss,
                                             global_step=global_step,
                                             var_list=opt_vars)

            # set output
            self.set_output('output', train_op)
            self.set_output('global_step', global_step)
Exemple #2
0
        def apiConstructor():
            # get trainable variables
            train_vars = tf.trainable_variables()
            if scope is not None:
                opt_vars = [var for var in train_vars if scope in var.name]
            else:
                opt_vars = train_vars

            lr_method = get_lr_scheduler(lr_scheduler, lr)
            global_step = tf.train.get_or_create_global_step()
            adam = tf.train.AdamOptimizer(learning_rate=lr_method,
                                          beta1=beta1,
                                          beta2=beta2,
                                          epsilon=eps)
            if clip_grad is not None:
                grads = adam.compute_gradients(loss, var_list=opt_vars)
                clipped_grads = [(tf.clip_by_value(grad, -1.0 * clip_grad,
                                                   1.0 * clip_grad), var)
                                 for grad, var in grads]
                train_op = adam.apply_gradients(clipped_grads,
                                                global_step=global_step)
            else:
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    train_op = adam.minimize(loss,
                                             global_step=global_step,
                                             var_list=opt_vars)

            # set output
            self.set_output('output', train_op)
            self.set_output('global_step', global_step)
Exemple #3
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define RMSProp optimizer given loss and weights/biases
        """
        # get attr
        # required field
        lr = float(self.get_attr('lr', default=None))
        if lr is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'lr', self.name))

        # optional field
        decay = float(self.get_attr('decay', default=0.9))
        mom = float(self.get_attr('mom', default=0.0))
        lr_scheduler = self.get_attr('lr_scheduler',
                                     default={})  # default will set later
        lr_dic = get_lr_scheduler(lr_scheduler)
        opt_dic = {
            'type': 'RMSProp',
            'base_lr': lr,
            'momentum': mom,
            'rms_decay': decay
        }

        # setting to learning option
        learning_option['opt_dic'] = opt_dic
        learning_option['lr_sched_dic'] = lr_dic
Exemple #4
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define adadelta optimizer given loss and weights/biases
        refer: ADADELTA: An Adaptive Learning Rate Method
        """
        # get attr
        # required field
        lr = float(self.get_attr('lr', default=None))
        if lr is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'lr', self.name))

        # optional field
        rho = float(self.get_attr('rho', default=0.95))
        eps = float(self.get_attr('epsilon', default=10**-8))
        lr_scheduler = self.get_attr('lr_scheduler',
                                     default={})  # default will set later
        lr_dic = get_lr_scheduler(lr_scheduler)
        opt_dic = {
            'type': 'AdaDelta',
            'base_lr': lr,
            'momentum': rho,
            'delta': eps
        }

        # setting to learning option
        learning_option['opt_dic'] = opt_dic
        learning_option['lr_sched_dic'] = lr_dic
Exemple #5
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define adam optimizer given loss and weights/biases
        """
        # get attr
        # required field
        lr = float(self.get_attr('lr', default=None))
        if lr is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'lr', self.name))

        # optional field
        beta1 = float(self.get_attr('beta1', default=0.9))
        beta2 = float(self.get_attr('beta2', default=0.999))
        eps = float(self.get_attr('epsilon', default=10**-8))
        lr_scheduler = self.get_attr('lr_scheduler',
                                     default={})  # default will set later
        lr_dic = get_lr_scheduler(lr_scheduler)
        opt_dic = {
            'type': 'Adam',
            'base_lr': lr,
            'momentum': beta1,
            'momentum2': beta2,
            'delta': eps
        }

        # setting to learning option
        learning_option['opt_dic'] = opt_dic
        learning_option['lr_sched_dic'] = lr_dic
Exemple #6
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define stochastic gradient descent(SGD) optimizer given loss and weights/biases
        """
        # get attr
        # required field
        lr = float(self.get_attr('lr', default=None))
        if lr is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'lr', self.name))

        # optional field
        lr_scheduler = self.get_attr('lr_scheduler',
                                     default={})  # default will set later
        lr_dic = get_lr_scheduler(lr_scheduler)
        opt_dic = {'type': 'SGD', 'base_lr': lr}

        # setting to learning option
        learning_option['opt_dic'] = opt_dic
        learning_option['lr_sched_dic'] = lr_dic
Exemple #7
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define adagrad optimizer given loss and weights/biases
        refer: Adaptive Subgradient Methods for Online Learning and Stochastic Optimization, Journal of Machine Learning Research 2011
        """
        # get attr
        # required field
        lr = float(self.get_attr('lr', default=None))
        if lr is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'lr', self.name))

        # optional field
        lr_scheduler = self.get_attr('lr_scheduler',
                                     default={})  # default will set later
        lr_dic = get_lr_scheduler(lr_scheduler)
        opt_dic = {'type': 'AdaGrad', 'base_lr': lr}

        # setting to learning option
        learning_option['opt_dic'] = opt_dic
        learning_option['lr_sched_dic'] = lr_dic
Exemple #8
0
def create_lr_scheduler(
        args: argparse.Namespace, resume_training: bool,
        training_state_dir: str) -> lr_scheduler.LearningRateScheduler:
    """
    Create the learning rate scheduler.

    :param args: Arguments as returned by argparse.
    :param resume_training: When True, the scheduler will be loaded from disk.
    :param training_state_dir: Directory where the training state is stored.
    :return: The learning rate scheduler.
    """
    learning_rate_half_life = none_if_negative(args.learning_rate_half_life)
    # TODO: The loading for continuation of the scheduler is done separately from the other parts
    if not resume_training:
        lr_scheduler_instance = lr_scheduler.get_lr_scheduler(
            args.learning_rate_scheduler_type, args.checkpoint_frequency,
            learning_rate_half_life, args.learning_rate_reduce_factor,
            args.learning_rate_reduce_num_not_improved,
            args.learning_rate_schedule, args.learning_rate_warmup)
    else:
        with open(os.path.join(training_state_dir, C.SCHEDULER_STATE_NAME),
                  "rb") as fp:
            lr_scheduler_instance = pickle.load(fp)
    return lr_scheduler_instance
Exemple #9
0
    def __init__(self, args):
        super(Trainer, self).__init__()
        self.args = args
        self.experiment_dir = args.experiment_dir
        if not osp.exists(self.experiment_dir):
            os.makedirs(self.experiment_dir)
            print("The experiment dir has been created:{}".format(
                self.experiment_dir))
        self.trainer_log = TrainerLog(args=args, append=True)
        self.ctx = set_ctx(args=args)
        self.check_point = CheckPoint(args=args,
                                      trainer_log=self.trainer_log,
                                      ctx=self.ctx)
        self.train_loader, self.test_loader = dataloader(args=args)
        self.lr_scheduler = None
        self.optimizer = None
        self.model = None
        if self.train_loader is not None:
            self.train_samples_num = self.train_loader._dataset.__len__()
            print("train dataset samples: {}".format(self.train_samples_num))
        self.test_samples_num = self.test_loader._dataset.__len__()
        print("test dataset samples: {}".format(self.test_samples_num))
        self.resume_epoch = 0
        if args.only_test is False:
            if args.use_tensorboard is True:
                from tensorboardX import SummaryWriter
                self.tb_writer = SummaryWriter(
                    log_dir=osp.join(args.experiment_dir, 'tensorboard'))
            else:
                self.tb_writer = None
            if args.resume is True:
                self.checkpoint_epoch = args.checkpoint_epoch
                self.model = get_networks(args=args, ctx=self.ctx)
                self.resume_epoch = self.check_point.load_checkpoint_parameters(
                    epoch=self.checkpoint_epoch, model=self.model)
            else:
                self.model = get_networks(args=args, ctx=self.ctx)
                self.model.classifier.initialize(ctx=self.ctx)

            self.lr_scheduler = get_lr_scheduler(
                args=args, train_loader=self.train_loader)
            self.optimizer, self.trainer = set_optimizer(
                model=self.model, lr_scheduler=self.lr_scheduler, args=args)
            self.loss_functions = set_loss(args=args, tb_writer=self.tb_writer)
            self.current_epoch = None
        elif args.only_test is True:
            self.checkpoint_epoch = args.checkpoint_epoch
            self.model = get_networks(args=args, ctx=self.ctx)
            self.epoch_test = args.epoch_test
            _ = self.check_point.load_checkpoint_parameters(
                epoch=self.checkpoint_epoch,
                model=self.model,
                epoch_test=self.epoch_test)
        if self.lr_scheduler is not None:
            self.trainer_log.print_use_lr_scheduler()
        if self.optimizer is not None and self.trainer is not None:
            self.trainer_log.print_use_optimizer()
        if self.model is not None:
            self.trainer_log.print_use_network()
        self.test_accuracy_metric = metric.Accuracy()
        self.epochs = args.epochs
        self.train_total = 0
        self.best_accuracy = None
        self.current_accuracy = None