Exemple #1
0
    def __init__(self, model, **kwargs):

        if type(model) is dict:
            model = create_model_for('classify', **model)
        super().__init__()
        if type(model) is dict:
            model = create_model_for('classify', **model)
        self.clip = float(kwargs.get('clip', 5))
        self.labels = model.labels
        self.gpus = int(kwargs.get('gpus', 1))
        if self.gpus == -1:
            self.gpus = len(
                os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU',
                                                            '0')).split(','))

        self.optimizer = OptimizerManager(model, **kwargs)
        self.model = model
        if self.gpus > 0 and self.model.gpu:
            self.crit = model.create_loss().cuda()
            if self.gpus > 1:
                self.model = torch.nn.DataParallel(model).cuda()
            else:
                self.model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")
            self.crit = model.create_loss()
            self.model = model
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
Exemple #2
0
    def __init__(self, model_params, **kwargs):
        """Create a Trainer, and give it the parameters needed to instantiate the model

        :param model_params: The model parameters
        :param kwargs: See below

        :Keyword Arguments:

          * *nsteps* (`int`) -- If we should report every n-steps, this should be passed
          * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e
          * *clip* (`int`) -- If we are doing gradient clipping, what value to use
          * *optim* (`str`) -- The name of the optimizer we are using
          * *lr* (`float`) -- The learning rate we are using
          * *mom* (`float`) -- If we are using SGD, what value to use for momentum
          * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9`
          * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999`
          * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8

        """
        super().__init__()
        if type(model_params) is dict:
            self.model = create_model_for('tagger', **model_params)
        else:
            self.model = model_params
        span_type = kwargs.get('span_type', 'iob')
        verbose = kwargs.get('verbose', False)
        self.evaluator = TaggerEvaluatorEagerTf(self.model, span_type, verbose)
        self.optimizer = EagerOptimizer(loss, **kwargs)
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
        checkpoint_dir = kwargs.get('checkpoint')
        if checkpoint_dir is None:
            checkpoint_dir = f'./tf-tagger-{os.getpid()}'
        self._checkpoint, self.checkpoint_manager = setup_tf2_checkpoints(
            self.optimizer, self.model, checkpoint_dir)
Exemple #3
0
    def __init__(self, model, **kwargs):
        super().__init__()
        if type(model) is dict:
            model = create_model_for('seq2seq', **model)

        self.clip = float(kwargs.get('clip', 5))
        self.model = model
        self.optimizer = OptimizerManager(self.model, **kwargs)
        self._input = model.make_input
        self._predict = model.predict
        self.tgt_rlut = kwargs['tgt_rlut']
        self.gpus = kwargs.get('gpus', 1)
        self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))
        self.label_smoothing = kwargs.get("label_smoothing")

        if self.gpus > 0:
            self.crit = model.create_loss(label_smooth=self.label_smoothing).cuda()
            if self.gpus > 1:
                self.model = torch.nn.DataParallel(model).cuda()
            else:
                self.model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")
            self.crit = model.create_loss()

        self.nsteps = kwargs.get('nsteps', 500)
Exemple #4
0
    def __init__(self, model_params, **kwargs):
        """Create a Trainer, and give it the parameters needed to instantiate the model

        :param model_params: The model parameters
        :param kwargs: See below

        :Keyword Arguments:

          * *nsteps* (`int`) -- If we should report every n-steps, this should be passed
          * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e
          * *clip* (`int`) -- If we are doing gradient clipping, what value to use
          * *optim* (`str`) -- The name of the optimizer we are using
          * *lr* (`float`) -- The learning rate we are using
          * *mom* (`float`) -- If we are using SGD, what value to use for momentum
          * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9`
          * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999`
          * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8

        """
        super().__init__()

        if type(model_params) is dict:
            self.model = create_model_for('classify', **model_params)
        else:
            self.model = model_params

        self.optimizer = EagerOptimizer(loss, **kwargs)
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
        self._checkpoint = tf.train.Checkpoint(
            optimizer=self.optimizer.optimizer, model=self.model)
        checkpoint_dir = '{}-{}'.format("./tf-classify", os.getpid())

        self.checkpoint_manager = tf.train.CheckpointManager(
            self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
Exemple #5
0
    def __init__(self, model, **kwargs):
        super().__init__()
        if type(model) is dict:
            checkpoint = kwargs.get('checkpoint')
            if checkpoint:
                model['checkpoint'] = checkpoint
            model = create_model_for('tagger', **model)
        self.grad_accum = int(kwargs.get('grad_accum', 1))
        self.gpus = int(kwargs.get('gpus', 1))
        # By default support IOB1/IOB2
        self.span_type = kwargs.get('span_type', 'iob')
        self.verbose = kwargs.get('verbose', False)

        logger.info('Setting span type %s', self.span_type)
        self.model = model
        self.idx2label = revlut(self.model.labels)
        self.clip = float(kwargs.get('clip', 5))
        self.optimizer = OptimizerManager(self.model, **kwargs)
        if self.gpus > 1:
            logger.info(
                "Trainer for PyTorch tagger currently doesnt support multiple GPUs.  Setting to 1"
            )
            self.gpus = 1
        if self.gpus > 0 and self.model.gpu:
            self.model = model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")

        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
Exemple #6
0
    def __init__(self, model_params, **kwargs):
        """Create a Trainer, and give it the parameters needed to instantiate the model

        :param model_params: The model parameters
        :param kwargs: See below

        :Keyword Arguments:

          * *nsteps* (`int`) -- If we should report every n-steps, this should be passed
          * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e
          * *clip* (`int`) -- If we are doing gradient clipping, what value to use
          * *optim* (`str`) -- The name of the optimizer we are using
          * *lr* (`float`) -- The learning rate we are using
          * *mom* (`float`) -- If we are using SGD, what value to use for momentum
          * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9`
          * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999`
          * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8

        """
        super().__init__()
        if type(model_params) is dict:
            self.model = create_model_for('classify', **model_params)
        else:
            self.model = model_params
        self.sess = self.model.sess
        self.loss = self.model.create_loss()
        self.test_loss = self.model.create_test_loss()
        self.global_step, train_op = optimizer(
            self.loss,
            colocate_gradients_with_ops=True,
            variables=self.model.trainable_variables,
            **kwargs)
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
        decay = kwargs.get('ema_decay', None)
        if decay is not None:
            self.ema = True
            ema_op, self.ema_load, self.ema_restore = _add_ema(
                self.model, float(decay))
            with tf.compat.v1.control_dependencies([ema_op]):
                self.train_op = tf.identity(train_op)
        else:
            self.ema = False
            self.train_op = train_op

        tables = tf.compat.v1.tables_initializer()
        self.model.sess.run(tables)
        self.model.sess.run(tf.compat.v1.global_variables_initializer())
        self.model.set_saver(tf.compat.v1.train.Saver())
        checkpoint = kwargs.get('checkpoint')
        if checkpoint is not None:
            skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss'])
            reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
Exemple #7
0
    def __init__(self, model_params, **kwargs):
        """Create a Trainer, and give it the parameters needed to instantiate the model

        :param model_params: The model parameters
        :param kwargs: See below

        :Keyword Arguments:

          * *nsteps* (`int`) -- If we should report every n-steps, this should be passed
          * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e
          * *clip* (`int`) -- If we are doing gradient clipping, what value to use
          * *optim* (`str`) -- The name of the optimizer we are using
          * *lr* (`float`) -- The learning rate we are using
          * *mom* (`float`) -- If we are using SGD, what value to use for momentum
          * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9`
          * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999`
          * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8
          * *tgt_rlut* (`dict`) -- This is a dictionary that converts from ints back to strings, used for predictions
          * *beam* (`int`) -- The beam size to use at prediction time, defaults to `10`

        """
        super().__init__()
        if type(model_params) is dict:
            self.model = create_model_for('seq2seq', **model_params)
        else:
            self.model = model_params
        self.sess = self.model.sess
        self.loss = self.model.create_loss()
        self.test_loss = self.model.create_test_loss()
        self.tgt_rlut = kwargs['tgt_rlut']
        self.base_dir = kwargs['basedir']
        self.global_step, self.train_op = optimizer(
            self.loss,
            colocate_gradients_with_ops=True,
            variables=self.model.trainable_variables,
            **kwargs)
        self.nsteps = kwargs.get('nsteps', 500)
        self.beam = kwargs.get('beam', 10)
        tables = tf.compat.v1.tables_initializer()
        self.model.sess.run(tables)
        self.model.sess.run(tf.compat.v1.global_variables_initializer())
        self.model.set_saver(tf.compat.v1.train.Saver())
        self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))

        init = tf.compat.v1.global_variables_initializer()
        self.model.sess.run(init)
        checkpoint = kwargs.get('checkpoint')
        if checkpoint is not None:
            skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss'])
            reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
Exemple #8
0
    def __init__(self, model_params, **kwargs):
        super().__init__()

        if type(model_params) is dict:
            self.model = create_model_for('lm', **model_params)
        else:
            self.model = model_params

        loss_fn = loss_with_state if self.model.requires_state else loss_without_state
        self.optimizer = EagerOptimizer(loss_fn, **kwargs)
        self.nsteps = kwargs.get('nsteps', 500)
        self._checkpoint = tf.train.Checkpoint(
            optimizer=self.optimizer.optimizer, model=self.model)
        checkpoint_dir = '{}-{}'.format("./tf-lm", os.getpid())

        self.checkpoint_manager = tf.train.CheckpointManager(
            self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
Exemple #9
0
    def __init__(self, model_params, **kwargs):
        """Create a Trainer, and give it the parameters needed to instantiate the model

        :param model_params: The model parameters
        :param kwargs: See below

        :Keyword Arguments:

          * *nsteps* (`int`) -- If we should report every n-steps, this should be passed
          * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e
          * *clip* (`int`) -- If we are doing gradient clipping, what value to use
          * *optim* (`str`) -- The name of the optimizer we are using
          * *lr* (`float`) -- The learning rate we are using
          * *mom* (`float`) -- If we are using SGD, what value to use for momentum
          * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9`
          * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999`
          * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8

        """
        super().__init__()
        if type(model_params) is dict:
            self.model = create_model_for('tagger', **model_params)
        else:
            self.model = model_params
        self.sess = self.model.sess
        self.loss = self.model.create_loss()
        span_type = kwargs.get('span_type', 'iob')
        verbose = kwargs.get('verbose', False)
        self.evaluator = TaggerEvaluatorTf(self.model, span_type, verbose)
        self.global_step, self.train_op = optimizer(
            self.loss,
            colocate_gradients_with_ops=True,
            variables=self.model.trainable_variables,
            **kwargs)
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
        tables = tf.compat.v1.tables_initializer()
        self.model.sess.run(tables)
        init = tf.compat.v1.global_variables_initializer()
        self.model.sess.run(init)
        saver = tf.compat.v1.train.Saver()
        self.model.save_using(saver)
        checkpoint = kwargs.get('checkpoint')
        if checkpoint is not None:
            skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss'])
            reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
Exemple #10
0
    def __init__(self, model, **kwargs):
        super().__init__()
        if type(model) is dict:
            model = create_model_for('lm', **model)
        self.model = model
        self.clip = float(kwargs.get('clip', 5))
        self.gpus = kwargs.get('gpus', 1)
        if self.gpus > 0:
            self.crit = model.create_loss().cuda()
            if self.gpus > 1:
                self.model = torch.nn.DataParallel(model).cuda()
            else:
                self.model.cuda()
        else:
            logger.warning("Requested training on CPU.  This will be slow.")
            self.crit = model.create_loss()

        self.nsteps = kwargs.get('nsteps', 500)
        self.optimizer = OptimizerManager(self.model, **kwargs)
Exemple #11
0
    def __init__(self, model_params, **kwargs):
        super().__init__()

        if type(model_params) is dict:
            self.model = create_model_for('seq2seq', **model_params)
        else:
            self.model = model_params

        self.tgt_rlut = kwargs['tgt_rlut']
        self.loss = Seq2SeqLoss(**kwargs)
        self.optimizer = EagerOptimizer(self.loss, **kwargs)
        self.nsteps = kwargs.get('nsteps', 500)
        self._checkpoint = tf.train.Checkpoint(
            optimizer=self.optimizer.optimizer, model=self.model)
        checkpoint_dir = '{}-{}'.format("./tf-seq2seq", os.getpid())
        self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))

        self.checkpoint_manager = tf.train.CheckpointManager(
            self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
Exemple #12
0
    def __init__(self, model_params, **kwargs):
        super().__init__()

        if type(model_params) is dict:
            self.model = create_model_for('seq2seq', **model_params)
        else:
            self.model = model_params

        self.tgt_rlut = kwargs['tgt_rlut']
        self.loss = Seq2SeqLoss(**kwargs)
        self.optimizer = EagerOptimizer(self.loss, **kwargs)
        self.nsteps = kwargs.get('nsteps', 500)

        checkpoint_dir = kwargs.get('checkpoint')
        if checkpoint_dir is None:
            checkpoint_dir = f'./tf-seq2seq-{os.getpid()}'
        self._checkpoint, self.checkpoint_manager = setup_tf2_checkpoints(
            self.optimizer, self.model, checkpoint_dir)

        self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))
Exemple #13
0
    def __init__(self, model_params, **kwargs):
        super().__init__()
        if type(model_params) is dict:
            self.model = create_model_for('seq2seq', **model_params)
        else:
            self.model = model_params

        self.tgt_rlut = kwargs['tgt_rlut']
        self.optimizer = EagerOptimizer(loss, **kwargs)
        self.nsteps = kwargs.get('nsteps', 500)
        self._checkpoint = tf.train.Checkpoint(
            optimizer=self.optimizer.optimizer, model=self.model)
        checkpoint_dir = '{}-{}'.format("./tf-seq2seq", os.getpid())

        self.checkpoint_manager = tf.train.CheckpointManager(
            self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
        strategy_type = kwargs.get('strategy_type', 'mirror')
        gpus = int(kwargs.get('gpus', 1))
        endpoint = kwargs.get('endpoint')
        self.strategy = create_distribute_strategy(strategy_type, gpus,
                                                   endpoint)
        self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))
Exemple #14
0
 def __init__(self, model_params, **kwargs):
     super().__init__()
     if type(model_params) is dict:
         self.model = create_model_for('lm', **model_params)
     else:
         self.model = model_params
     self.sess = self.model.sess
     self.loss = self.model.create_loss()
     self.test_loss = self.model.create_test_loss()
     self.global_step, self.train_op = optimizer(
         self.loss,
         colocate_gradients_with_ops=True,
         variables=self.model.trainable_variables,
         **kwargs)
     self.nsteps = kwargs.get('nsteps', 500)
     init = tf.compat.v1.global_variables_initializer()
     self.model.sess.run(init)
     saver = tf.compat.v1.train.Saver()
     self.model.set_saver(saver)
     checkpoint = kwargs.get('checkpoint')
     if checkpoint is not None:
         skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss'])
         reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
def create_servable_embeddings_model(embeddings, **kwargs):
    return create_model_for('servable_embeddings', embeddings, None, **kwargs)