def __init__(self, model_params, **kwargs): """Create a Trainer, and give it the parameters needed to instantiate the model :param model_params: The model parameters :param kwargs: See below :Keyword Arguments: * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 """ super().__init__() if type(model_params) is dict: self.model = create_model_for('classify', **model_params) else: self.model = model_params self.sess = self.model.sess self.loss = self.model.create_loss() self.test_loss = self.model.create_test_loss() self.global_step, train_op = optimizer( self.loss, colocate_gradients_with_ops=True, variables=self.model.trainable_variables, **kwargs) self.nsteps = kwargs.get('nsteps', six.MAXSIZE) decay = kwargs.get('ema_decay', None) if decay is not None: self.ema = True ema_op, self.ema_load, self.ema_restore = _add_ema( self.model, float(decay)) with tf.compat.v1.control_dependencies([ema_op]): self.train_op = tf.identity(train_op) else: self.ema = False self.train_op = train_op tables = tf.compat.v1.tables_initializer() self.model.sess.run(tables) self.model.sess.run(tf.compat.v1.global_variables_initializer()) self.model.set_saver(tf.compat.v1.train.Saver()) checkpoint = kwargs.get('checkpoint') if checkpoint is not None: skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss']) reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
def __init__(self, model_params, **kwargs): """Create a Trainer, and give it the parameters needed to instantiate the model :param model_params: The model parameters :param kwargs: See below :Keyword Arguments: * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 * *tgt_rlut* (`dict`) -- This is a dictionary that converts from ints back to strings, used for predictions * *beam* (`int`) -- The beam size to use at prediction time, defaults to `10` """ super().__init__() if type(model_params) is dict: self.model = create_model_for('seq2seq', **model_params) else: self.model = model_params self.sess = self.model.sess self.loss = self.model.create_loss() self.test_loss = self.model.create_test_loss() self.tgt_rlut = kwargs['tgt_rlut'] self.base_dir = kwargs['basedir'] self.global_step, self.train_op = optimizer( self.loss, colocate_gradients_with_ops=True, variables=self.model.trainable_variables, **kwargs) self.nsteps = kwargs.get('nsteps', 500) self.beam = kwargs.get('beam', 10) tables = tf.compat.v1.tables_initializer() self.model.sess.run(tables) self.model.sess.run(tf.compat.v1.global_variables_initializer()) self.model.set_saver(tf.compat.v1.train.Saver()) self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4)) init = tf.compat.v1.global_variables_initializer() self.model.sess.run(init) checkpoint = kwargs.get('checkpoint') if checkpoint is not None: skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss']) reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
def __init__(self, model_params, **kwargs): """Create a Trainer, and give it the parameters needed to instantiate the model :param model_params: The model parameters :param kwargs: See below :Keyword Arguments: * *nsteps* (`int`) -- If we should report every n-steps, this should be passed * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e * *clip* (`int`) -- If we are doing gradient clipping, what value to use * *optim* (`str`) -- The name of the optimizer we are using * *lr* (`float`) -- The learning rate we are using * *mom* (`float`) -- If we are using SGD, what value to use for momentum * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9` * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999` * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8 """ super().__init__() if type(model_params) is dict: self.model = create_model_for('tagger', **model_params) else: self.model = model_params self.sess = self.model.sess self.loss = self.model.create_loss() span_type = kwargs.get('span_type', 'iob') verbose = kwargs.get('verbose', False) self.evaluator = TaggerEvaluatorTf(self.model, span_type, verbose) self.global_step, self.train_op = optimizer( self.loss, colocate_gradients_with_ops=True, variables=self.model.trainable_variables, **kwargs) self.nsteps = kwargs.get('nsteps', six.MAXSIZE) tables = tf.compat.v1.tables_initializer() self.model.sess.run(tables) init = tf.compat.v1.global_variables_initializer() self.model.sess.run(init) saver = tf.compat.v1.train.Saver() self.model.save_using(saver) checkpoint = kwargs.get('checkpoint') if checkpoint is not None: skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss']) reload_checkpoint(self.model.sess, checkpoint, skip_blocks)
def __init__(self, model_params, **kwargs): super().__init__() if type(model_params) is dict: self.model = create_model_for('lm', **model_params) else: self.model = model_params self.sess = self.model.sess self.loss = self.model.create_loss() self.test_loss = self.model.create_test_loss() self.global_step, self.train_op = optimizer( self.loss, colocate_gradients_with_ops=True, variables=self.model.trainable_variables, **kwargs) self.nsteps = kwargs.get('nsteps', 500) init = tf.compat.v1.global_variables_initializer() self.model.sess.run(init) saver = tf.compat.v1.train.Saver() self.model.set_saver(saver) checkpoint = kwargs.get('checkpoint') if checkpoint is not None: skip_blocks = kwargs.get('blocks_to_skip', ['OptimizeLoss']) reload_checkpoint(self.model.sess, checkpoint, skip_blocks)