def __init__(self, model, **kwargs): if type(model) is dict: model = create_model_for('classify', **model) super().__init__() if type(model) is dict: model = create_model_for('classify', **model) self.clip = float(kwargs.get('clip', 5)) self.labels = model.labels self.gpus = int(kwargs.get('gpus', 1)) if self.gpus == -1: self.gpus = len( os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU', '0')).split(',')) self.optimizer = OptimizerManager(model, **kwargs) self.model = model if self.gpus > 0 and self.model.gpu: self.crit = model.create_loss().cuda() if self.gpus > 1: self.model = torch.nn.DataParallel(model).cuda() else: self.model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.crit = model.create_loss() self.model = model self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def __init__(self, model_params, **kwargs): """Create a Trainer, and give it the parameters needed to instantiate the model :param model_params: The model parameters :param kwargs: See below :Keyword Arguments: * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 """ super().__init__() if type(model_params) is dict: self.model = create_model_for('tagger', **model_params) else: self.model = model_params span_type = kwargs.get('span_type', 'iob') verbose = kwargs.get('verbose', False) self.evaluator = TaggerEvaluatorEagerTf(self.model, span_type, verbose) self.optimizer = EagerOptimizer(loss, **kwargs) self.nsteps = kwargs.get('nsteps', six.MAXSIZE) checkpoint_dir = kwargs.get('checkpoint') if checkpoint_dir is None: checkpoint_dir = f'./tf-tagger-{os.getpid()}' self._checkpoint, self.checkpoint_manager = setup_tf2_checkpoints( self.optimizer, self.model, checkpoint_dir)
def __init__(self, model, **kwargs): super().__init__() if type(model) is dict: model = create_model_for('seq2seq', **model) self.clip = float(kwargs.get('clip', 5)) self.model = model self.optimizer = OptimizerManager(self.model, **kwargs) self._input = model.make_input self._predict = model.predict self.tgt_rlut = kwargs['tgt_rlut'] self.gpus = kwargs.get('gpus', 1) self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4)) self.label_smoothing = kwargs.get("label_smoothing") if self.gpus > 0: self.crit = model.create_loss(label_smooth=self.label_smoothing).cuda() if self.gpus > 1: self.model = torch.nn.DataParallel(model).cuda() else: self.model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.crit = model.create_loss() self.nsteps = kwargs.get('nsteps', 500)
def __init__(self, model_params, **kwargs): """Create a Trainer, and give it the parameters needed to instantiate the model :param model_params: The model parameters :param kwargs: See below :Keyword Arguments: * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 """ super().__init__() if type(model_params) is dict: self.model = create_model_for('classify', **model_params) else: self.model = model_params self.optimizer = EagerOptimizer(loss, **kwargs) self.nsteps = kwargs.get('nsteps', six.MAXSIZE) self._checkpoint = tf.train.Checkpoint( optimizer=self.optimizer.optimizer, model=self.model) checkpoint_dir = '{}-{}'.format("./tf-classify", os.getpid()) self.checkpoint_manager = tf.train.CheckpointManager( self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
def __init__(self, model, **kwargs): super().__init__() if type(model) is dict: checkpoint = kwargs.get('checkpoint') if checkpoint: model['checkpoint'] = checkpoint model = create_model_for('tagger', **model) self.grad_accum = int(kwargs.get('grad_accum', 1)) self.gpus = int(kwargs.get('gpus', 1)) # By default support IOB1/IOB2 self.span_type = kwargs.get('span_type', 'iob') self.verbose = kwargs.get('verbose', False) logger.info('Setting span type %s', self.span_type) self.model = model self.idx2label = revlut(self.model.labels) self.clip = float(kwargs.get('clip', 5)) self.optimizer = OptimizerManager(self.model, **kwargs) if self.gpus > 1: logger.info( "Trainer for PyTorch tagger currently doesnt support multiple GPUs. Setting to 1" ) self.gpus = 1 if self.gpus > 0 and self.model.gpu: self.model = model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
def __init__(self, model_params, **kwargs): """Create a Trainer, and give it the parameters needed to instantiate the model :param model_params: The model parameters :param kwargs: See below :Keyword Arguments: * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 """ super().__init__() if type(model_params) is dict: self.model = create_model_for('classify', **model_params) else: self.model = model_params self.sess = self.model.sess self.loss = self.model.create_loss() self.test_loss = self.model.create_test_loss() self.global_step, train_op = optimizer( self.loss, colocate_gradients_with_ops=True, variables=self.model.trainable_variables, **kwargs) self.nsteps = kwargs.get('nsteps', six.MAXSIZE) decay = kwargs.get('ema_decay', None) if decay is not None: self.ema = True ema_op, self.ema_load, self.ema_restore = _add_ema( self.model, float(decay)) with tf.compat.v1.control_dependencies([ema_op]): self.train_op = tf.identity(train_op) else: self.ema = False self.train_op = train_op tables = tf.compat.v1.tables_initializer() self.model.sess.run(tables) self.model.sess.run(tf.compat.v1.global_variables_initializer()) self.model.set_saver(tf.compat.v1.train.Saver()) checkpoint = kwargs.get('checkpoint') if checkpoint is not None: skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss']) reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
def __init__(self, model_params, **kwargs): """Create a Trainer, and give it the parameters needed to instantiate the model :param model_params: The model parameters :param kwargs: See below :Keyword Arguments: * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 * *tgt_rlut* (`dict`) -- This is a dictionary that converts from ints back to strings, used for predictions * *beam* (`int`) -- The beam size to use at prediction time, defaults to `10` """ super().__init__() if type(model_params) is dict: self.model = create_model_for('seq2seq', **model_params) else: self.model = model_params self.sess = self.model.sess self.loss = self.model.create_loss() self.test_loss = self.model.create_test_loss() self.tgt_rlut = kwargs['tgt_rlut'] self.base_dir = kwargs['basedir'] self.global_step, self.train_op = optimizer( self.loss, colocate_gradients_with_ops=True, variables=self.model.trainable_variables, **kwargs) self.nsteps = kwargs.get('nsteps', 500) self.beam = kwargs.get('beam', 10) tables = tf.compat.v1.tables_initializer() self.model.sess.run(tables) self.model.sess.run(tf.compat.v1.global_variables_initializer()) self.model.set_saver(tf.compat.v1.train.Saver()) self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4)) init = tf.compat.v1.global_variables_initializer() self.model.sess.run(init) checkpoint = kwargs.get('checkpoint') if checkpoint is not None: skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss']) reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
def __init__(self, model_params, **kwargs): super().__init__() if type(model_params) is dict: self.model = create_model_for('lm', **model_params) else: self.model = model_params loss_fn = loss_with_state if self.model.requires_state else loss_without_state self.optimizer = EagerOptimizer(loss_fn, **kwargs) self.nsteps = kwargs.get('nsteps', 500) self._checkpoint = tf.train.Checkpoint( optimizer=self.optimizer.optimizer, model=self.model) checkpoint_dir = '{}-{}'.format("./tf-lm", os.getpid()) self.checkpoint_manager = tf.train.CheckpointManager( self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
def __init__(self, model_params, **kwargs): """Create a Trainer, and give it the parameters needed to instantiate the model :param model_params: The model parameters :param kwargs: See below :Keyword Arguments: * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 """ super().__init__() if type(model_params) is dict: self.model = create_model_for('tagger', **model_params) else: self.model = model_params self.sess = self.model.sess self.loss = self.model.create_loss() span_type = kwargs.get('span_type', 'iob') verbose = kwargs.get('verbose', False) self.evaluator = TaggerEvaluatorTf(self.model, span_type, verbose) self.global_step, self.train_op = optimizer( self.loss, colocate_gradients_with_ops=True, variables=self.model.trainable_variables, **kwargs) self.nsteps = kwargs.get('nsteps', six.MAXSIZE) tables = tf.compat.v1.tables_initializer() self.model.sess.run(tables) init = tf.compat.v1.global_variables_initializer() self.model.sess.run(init) saver = tf.compat.v1.train.Saver() self.model.save_using(saver) checkpoint = kwargs.get('checkpoint') if checkpoint is not None: skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss']) reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
def __init__(self, model, **kwargs): super().__init__() if type(model) is dict: model = create_model_for('lm', **model) self.model = model self.clip = float(kwargs.get('clip', 5)) self.gpus = kwargs.get('gpus', 1) if self.gpus > 0: self.crit = model.create_loss().cuda() if self.gpus > 1: self.model = torch.nn.DataParallel(model).cuda() else: self.model.cuda() else: logger.warning("Requested training on CPU. This will be slow.") self.crit = model.create_loss() self.nsteps = kwargs.get('nsteps', 500) self.optimizer = OptimizerManager(self.model, **kwargs)
def __init__(self, model_params, **kwargs): super().__init__() if type(model_params) is dict: self.model = create_model_for('seq2seq', **model_params) else: self.model = model_params self.tgt_rlut = kwargs['tgt_rlut'] self.loss = Seq2SeqLoss(**kwargs) self.optimizer = EagerOptimizer(self.loss, **kwargs) self.nsteps = kwargs.get('nsteps', 500) self._checkpoint = tf.train.Checkpoint( optimizer=self.optimizer.optimizer, model=self.model) checkpoint_dir = '{}-{}'.format("./tf-seq2seq", os.getpid()) self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4)) self.checkpoint_manager = tf.train.CheckpointManager( self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
def __init__(self, model_params, **kwargs): super().__init__() if type(model_params) is dict: self.model = create_model_for('seq2seq', **model_params) else: self.model = model_params self.tgt_rlut = kwargs['tgt_rlut'] self.loss = Seq2SeqLoss(**kwargs) self.optimizer = EagerOptimizer(self.loss, **kwargs) self.nsteps = kwargs.get('nsteps', 500) checkpoint_dir = kwargs.get('checkpoint') if checkpoint_dir is None: checkpoint_dir = f'./tf-seq2seq-{os.getpid()}' self._checkpoint, self.checkpoint_manager = setup_tf2_checkpoints( self.optimizer, self.model, checkpoint_dir) self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))
def __init__(self, model_params, **kwargs): super().__init__() if type(model_params) is dict: self.model = create_model_for('seq2seq', **model_params) else: self.model = model_params self.tgt_rlut = kwargs['tgt_rlut'] self.optimizer = EagerOptimizer(loss, **kwargs) self.nsteps = kwargs.get('nsteps', 500) self._checkpoint = tf.train.Checkpoint( optimizer=self.optimizer.optimizer, model=self.model) checkpoint_dir = '{}-{}'.format("./tf-seq2seq", os.getpid()) self.checkpoint_manager = tf.train.CheckpointManager( self._checkpoint, directory=checkpoint_dir, max_to_keep=5) strategy_type = kwargs.get('strategy_type', 'mirror') gpus = int(kwargs.get('gpus', 1)) endpoint = kwargs.get('endpoint') self.strategy = create_distribute_strategy(strategy_type, gpus, endpoint) self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))
def __init__(self, model_params, **kwargs): super().__init__() if type(model_params) is dict: self.model = create_model_for('lm', **model_params) else: self.model = model_params self.sess = self.model.sess self.loss = self.model.create_loss() self.test_loss = self.model.create_test_loss() self.global_step, self.train_op = optimizer( self.loss, colocate_gradients_with_ops=True, variables=self.model.trainable_variables, **kwargs) self.nsteps = kwargs.get('nsteps', 500) init = tf.compat.v1.global_variables_initializer() self.model.sess.run(init) saver = tf.compat.v1.train.Saver() self.model.set_saver(saver) checkpoint = kwargs.get('checkpoint') if checkpoint is not None: skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss']) reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
def create_servable_embeddings_model(embeddings, **kwargs): return create_model_for('servable_embeddings', embeddings, None, **kwargs)