Ejemplo n.º 1
0
    def __init__(self, model_params, **kwargs):
        """Create a Trainer, and give it the parameters needed to instantiate the model

        :param model_params: The model parameters
        :param kwargs: See below

        :Keyword Arguments:

          * *nsteps* (`int`) -- If we should report every n-steps, this should be passed
          * *ema_decay* (`float`) -- If we are doing an exponential moving average, what decay to us4e
          * *clip* (`int`) -- If we are doing gradient clipping, what value to use
          * *optim* (`str`) -- The name of the optimizer we are using
          * *lr* (`float`) -- The learning rate we are using
          * *mom* (`float`) -- If we are using SGD, what value to use for momentum
          * *beta1* (`float`) -- Adam-specific hyper-param, defaults to `0.9`
          * *beta2* (`float`) -- Adam-specific hyper-param, defaults to `0.999`
          * *epsilon* (`float`) -- Adam-specific hyper-param, defaults to `1e-8

        """
        super().__init__()
        if type(model_params) is dict:
            self.model = create_model_for('tagger', **model_params)
        else:
            self.model = model_params
        span_type = kwargs.get('span_type', 'iob')
        verbose = kwargs.get('verbose', False)
        self.evaluator = TaggerEvaluatorEagerTf(self.model, span_type, verbose)
        self.optimizer = EagerOptimizer(loss, **kwargs)
        self.nsteps = kwargs.get('nsteps', six.MAXSIZE)
        checkpoint_dir = kwargs.get('checkpoint')
        if checkpoint_dir is None:
            checkpoint_dir = f'./tf-tagger-{os.getpid()}'
        self._checkpoint, self.checkpoint_manager = setup_tf2_checkpoints(
            self.optimizer, self.model, checkpoint_dir)
Ejemplo n.º 2
0
    def __init__(self, model_params, **kwargs):
        super().__init__()

        if type(model_params) is dict:
            self.model = create_model_for('lm', **model_params)
        else:
            self.model = model_params

        loss_fn = loss_with_state if self.model.requires_state else loss_without_state
        self.optimizer = EagerOptimizer(loss_fn, **kwargs)
        self.nsteps = kwargs.get('nsteps', 500)
        self._checkpoint = tf.train.Checkpoint(
            optimizer=self.optimizer.optimizer, model=self.model)
        checkpoint_dir = '{}-{}'.format("./tf-lm", os.getpid())

        self.checkpoint_manager = tf.train.CheckpointManager(
            self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
Ejemplo n.º 3
0
    def __init__(self, model_params, **kwargs):
        super().__init__()

        if type(model_params) is dict:
            self.model = create_model_for('seq2seq', **model_params)
        else:
            self.model = model_params

        self.tgt_rlut = kwargs['tgt_rlut']
        self.loss = Seq2SeqLoss(**kwargs)
        self.optimizer = EagerOptimizer(self.loss, **kwargs)
        self.nsteps = kwargs.get('nsteps', 500)
        self._checkpoint = tf.train.Checkpoint(
            optimizer=self.optimizer.optimizer, model=self.model)
        checkpoint_dir = '{}-{}'.format("./tf-seq2seq", os.getpid())
        self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))

        self.checkpoint_manager = tf.train.CheckpointManager(
            self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
Ejemplo n.º 4
0
    def __init__(self, model_params, **kwargs):
        super().__init__()

        if type(model_params) is dict:
            self.model = create_model_for('seq2seq', **model_params)
        else:
            self.model = model_params

        self.tgt_rlut = kwargs['tgt_rlut']
        self.loss = Seq2SeqLoss(**kwargs)
        self.optimizer = EagerOptimizer(self.loss, **kwargs)
        self.nsteps = kwargs.get('nsteps', 500)

        checkpoint_dir = kwargs.get('checkpoint')
        if checkpoint_dir is None:
            checkpoint_dir = f'./tf-seq2seq-{os.getpid()}'
        self._checkpoint, self.checkpoint_manager = setup_tf2_checkpoints(
            self.optimizer, self.model, checkpoint_dir)

        self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))
Ejemplo n.º 5
0
    def __init__(self, model_params, **kwargs):
        super().__init__()
        if type(model_params) is dict:
            self.model = create_model_for('seq2seq', **model_params)
        else:
            self.model = model_params

        self.tgt_rlut = kwargs['tgt_rlut']
        self.optimizer = EagerOptimizer(loss, **kwargs)
        self.nsteps = kwargs.get('nsteps', 500)
        self._checkpoint = tf.train.Checkpoint(
            optimizer=self.optimizer.optimizer, model=self.model)
        checkpoint_dir = '{}-{}'.format("./tf-seq2seq", os.getpid())

        self.checkpoint_manager = tf.train.CheckpointManager(
            self._checkpoint, directory=checkpoint_dir, max_to_keep=5)
        strategy_type = kwargs.get('strategy_type', 'mirror')
        gpus = int(kwargs.get('gpus', 1))
        endpoint = kwargs.get('endpoint')
        self.strategy = create_distribute_strategy(strategy_type, gpus,
                                                   endpoint)
        self.bleu_n_grams = int(kwargs.get("bleu_n_grams", 4))
Ejemplo n.º 6
0
model = to_device(
    L.EmbedPoolStackModel(
        2, L.EmbeddingsStack(embeddings),
        L.WithoutLength(L.ParallelConv(None, args.poolsz, args.filts)),
        L.Highway(stacksz)))


def loss(model, x, y):
    y_ = model(x)
    return tf.compat.v1.losses.sparse_softmax_cross_entropy(labels=y,
                                                            logits=y_)


# This works with TF 2.0 and PyTorch:
optimizer = EagerOptimizer(loss, optim="adam", lr=0.001)


@tf.function
def train_step(optimizer, model, x, y):
    loss_value = optimizer.update(model, x, y)
    return loss_value


for epoch in range(num_epochs):
    loss_acc = 0.
    step = 0
    start = time.time()
    for x, y in train_set.get_input(training=True):
        loss_value = train_step(optimizer, model, x, y)
        loss_acc += loss_value
Ejemplo n.º 7
0
    return text_generated


def loss(model, h, x, y):
    x["h"] = h
    logits, h = model(x)
    vsz = embeddings["word"].get_vsz()
    targets = tf.reshape(y, [-1])
    bt_x_v = tf.nn.log_softmax(tf.reshape(logits, [-1, vsz]), axis=-1)
    one_hots = tf.one_hot(targets, vsz)
    example_loss = -tf.reduce_sum(one_hots * bt_x_v, axis=-1)
    loss = tf.reduce_mean(example_loss)
    return loss, h


optimizer = EagerOptimizer(loss, optim="adam", lr=args.lr)
for epoch in range(args.epochs):

    loss_accum = 0.
    step = 0
    start = time.time()
    h = None

    SET_TRAIN_FLAG(True)

    for x, y in train_input_fn():
        # Optimize the model
        loss_value, h = optimizer.update_with_hidden(model, h, x, y)
        loss_accum += loss_value
        step += 1
    print('training time {}'.format(time.time() - start))