Beispiel #1
0
    def _init_optimiser(self):

        self.current_lr = self._hparams.learning_rate

        self._loss_weights = tf.sequence_mask(lengths=self._labels_length,
                                              dtype=self._hparams.dtype)

        self.batch_loss = seq2seq.sequence_loss(
            logits=self.decoder_train_outputs,
            targets=self._labels,
            weights=self._loss_weights,
            softmax_loss_function=None,
            average_across_batch=True,
            average_across_timesteps=True)

        self.reg_loss = 0

        if self._hparams.recurrent_l2_regularisation is not None:
            regularisable_vars = _get_trainable_vars(self._hparams.cell_type)
            reg = tf.contrib.layers.l2_regularizer(
                scale=self._hparams.recurrent_l2_regularisation)
            self.reg_loss = tf.contrib.layers.apply_regularization(
                reg, regularisable_vars)

        self.batch_loss = self.batch_loss + self.reg_loss

        if self._hparams.optimiser == 'Adam':
            optimiser = tf.train.AdamOptimizer(
                learning_rate=self.current_lr,
                epsilon=1e-8 if self._hparams.dtype == tf.float32 else 1e-4,
            )
        elif self._hparams.optimiser == 'AdamW':
            from tensorflow.contrib.opt import AdamWOptimizer
            optimiser = AdamWOptimizer(
                learning_rate=self.current_lr,
                weight_decay=self._hparams.weight_decay,
                epsilon=1e-8 if self._hparams.dtype == tf.float32 else 1e-4,
            )
        elif self._hparams.optimiser == 'Momentum':
            optimiser = tf.train.MomentumOptimizer(
                learning_rate=self.current_lr,
                momentum=0.9,
                use_nesterov=False)
        else:
            raise Exception('Unsupported optimiser, try Adam')

        variables = tf.trainable_variables()
        gradients = tf.gradients(self.batch_loss, variables)

        if self._hparams.clip_gradients is True:
            gradients, _ = tf.clip_by_global_norm(
                gradients, self._hparams.max_gradient_norm)

        self.train_op = optimiser.apply_gradients(
            grads_and_vars=zip(gradients, variables),
            global_step=tf.train.get_global_step())
Beispiel #2
0
    def _init_optimiser(self):
        r"""
        Computes the batch_loss function to be minimised
        """

        self._init_lr_decay()

        self._loss_weights = tf.sequence_mask(
            lengths=self._labels_len,
            dtype=self._hparams.dtype
        )

        if self._hparams.loss_fun is None:
            softmax_loss_fun = None
        elif self._hparams.loss_fun == 'focal_loss':
            softmax_loss_fun = focal_loss
        elif self._hparams.loss_fun == 'mc_loss':
            softmax_loss_fun = mc_loss
        else:
            raise ValueError('Unknown loss function {}'.format(self._hparams.loss_fun))

        self.batch_loss = seq2seq.sequence_loss(
            logits=self._basic_decoder_train_outputs.rnn_output,
            targets=self._labels,
            weights=self._loss_weights,
            softmax_loss_function=softmax_loss_fun,
            average_across_batch=True,
            average_across_timesteps=True)

        reg_loss = 0

        if self._hparams.recurrent_l2_regularisation is not None:
            regularisable_vars = _get_trainable_vars(self._hparams.cell_type)
            reg = tf.contrib.layers.l2_regularizer(scale=self._hparams.recurrent_l2_regularisation)
            reg_loss = tf.contrib.layers.apply_regularization(reg, regularisable_vars)

        if self._hparams.video_processing is not None:
            if 'cnn' in self._hparams.video_processing:
                # we regularise the cnn vars by specifying a regulariser in conv2d
                reg_variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
                reg_loss += tf.reduce_sum(reg_variables)

        self.batch_loss = self.batch_loss + reg_loss

        if self._hparams.loss_scaling > 1:
            self.batch_loss *= self._hparams.loss_scaling

        if self._hparams.optimiser == 'Adam':
            optimiser = tf.train.AdamOptimizer(
                learning_rate=self.current_lr,
                epsilon=1e-8 if self._hparams.dtype == tf.float32 else 1e-4,
            )
        elif self._hparams.optimiser == 'AdamW':
            from tensorflow.contrib.opt import AdamWOptimizer
            optimiser = AdamWOptimizer(
                learning_rate=self.current_lr,
                weight_decay=self._hparams.weight_decay,
                epsilon=1e-8 if self._hparams.dtype == tf.float32 else 1e-4,
            )
        elif self._hparams.optimiser == 'Momentum':
            optimiser = tf.train.MomentumOptimizer(
                learning_rate=self.current_lr,
                momentum=0.9,
                use_nesterov=False
            )
        elif self._hparams.optimiser == 'AMSGrad':
            from .AMSGrad import AMSGrad
            optimiser = AMSGrad(
                learning_rate=self.current_lr,
                epsilon=1e-8 if self._hparams.dtype == tf.float32 else 1e-4,
            )
        else:
            raise Exception('Unsupported optimiser, try Adam')

        variables = tf.trainable_variables()
        gradients = tf.gradients(self.batch_loss, variables)

        if self._hparams.loss_scaling > 1:
            gradients = [tf.div(grad, self._hparams.loss_scaling) for grad in gradients]

        if self._hparams.clip_gradients is True:
            gradients, _ = tf.clip_by_global_norm(gradients, self._hparams.max_gradient_norm)

        if self._hparams.batch_normalisation is True:
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_op = optimiser.apply_gradients(

                    zip(gradients, variables), global_step=tf.train.get_global_step())
        else:
            self.train_op = optimiser.apply_gradients(
                zip(gradients, variables))
Beispiel #3
0
    def _init_optimiser(self):
        r"""
                Computes the batch_loss function to be minimised
                """

        self._init_lr_decay()

        self._loss_weights = tf.sequence_mask(
            lengths=self._decoder._labels_len, dtype=self._hparams.dtype)

        if self._hparams.loss_fun is None:
            if self._hparams.label_smoothing <= 0.0:
                softmax_loss_fun = None
            else:
                print(
                    'Using the slower "softmax_cross_entropy" instead of "sparse_softmax_cross_entropy" '
                    'since label smoothing is nonzero')
                from .devel import smoothed_cross_entropy
                num_classes = tf.shape(self._decoder._logits)[2]
                softmax_loss_fun = smoothed_cross_entropy(
                    num_classes, self._hparams.label_smoothing)
        elif self._hparams.loss_fun == 'focal_loss':
            from .devel import focal_loss
            softmax_loss_fun = focal_loss
        elif self._hparams.loss_fun == 'mc_loss':
            from .devel import mc_loss
            softmax_loss_fun = mc_loss
        else:
            raise ValueError('Unknown loss function {}'.format(
                self._hparams.loss_fun))

        self.batch_loss = seq2seq.sequence_loss(
            logits=self._decoder._logits,
            targets=self._decoder._labels,
            weights=self._loss_weights,
            softmax_loss_function=softmax_loss_fun,
            average_across_batch=True,
            average_across_timesteps=True)

        reg_loss = 0

        if self._hparams.recurrent_l2_regularisation is not None:
            regularisable_vars = _get_trainable_vars(self._hparams.cell_type)
            reg = tf.contrib.layers.l2_regularizer(
                scale=self._hparams.recurrent_l2_regularisation)
            reg_loss = tf.contrib.layers.apply_regularization(
                reg, regularisable_vars)

        if self._hparams.video_processing is not None:
            if 'cnn' in self._hparams.video_processing:
                # we regularise the cnn vars by specifying a regulariser in conv2d
                reg_variables = tf.get_collection(
                    tf.GraphKeys.REGULARIZATION_LOSSES)
                reg_loss += tf.reduce_sum(reg_variables)

        self.batch_loss = self.batch_loss + reg_loss

        if self._hparams.regress_aus is True:
            loss_weight = self._hparams.kwargs.get('au_loss_weight', 10.0)
            self.batch_loss += loss_weight * self._video_encoder.au_loss

        if self._hparams.loss_scaling > 1:
            self.batch_loss *= self._hparams.loss_scaling

        if self._hparams.optimiser == 'Adam':
            optimiser = tf.train.AdamOptimizer(
                learning_rate=self.current_lr,
                epsilon=1e-8 if self._hparams.dtype == tf.float32 else 1e-4,
            )
        elif self._hparams.optimiser == 'Nadam':
            from tensorflow.contrib.opt import NadamOptimizer
            optimiser = NadamOptimizer(learning_rate=self.current_lr, )
        elif self._hparams.optimiser == 'AdamW':
            from tensorflow.contrib.opt import AdamWOptimizer
            optimiser = AdamWOptimizer(
                learning_rate=self.current_lr,
                weight_decay=self._hparams.weight_decay,
                epsilon=1e-8 if self._hparams.dtype == tf.float32 else 1e-4,
            )
        elif self._hparams.optimiser == 'Momentum':
            optimiser = tf.train.MomentumOptimizer(
                learning_rate=self.current_lr,
                momentum=0.9,
                use_nesterov=False)
        else:
            raise Exception('Unsupported optimiser, try Adam')

        variables = tf.trainable_variables()
        gradients = tf.gradients(
            self.batch_loss,
            variables)  # not compatible with Nvidia AMP (fp16)
        # gradients = optimiser.compute_gradients(self.batch_loss, variables)

        summaries = []
        for grad, variable in zip(gradients, variables):
            if isinstance(grad, tf.IndexedSlices):
                value = grad.values
            else:
                value = grad
            summary = tf.summary.histogram("%s-grad" % variable.name, value)
            summaries.append(summary)

        if self._hparams.dtype == tf.float16:
            #ripped from https://github.com/joeyearsley/efficient_densenet_tensorflow/blob/master/train.py
            # Choose a loss scale manager which decides how to pick the right loss scale
            # throughout the training process.
            loss_scale_manager = ExponentialUpdateLossScaleManager(128, 100)
            # Wraps the original optimizer in a LossScaleOptimizer.
            optimizer = LossScaleOptimizer(optimiser, loss_scale_manager)

        if self._hparams.loss_scaling > 1:
            gradients = [
                tf.div(grad, self._hparams.loss_scaling) for grad in gradients
            ]

        if self._hparams.clip_gradients is True:
            gradients, self.global_norm = tf.clip_by_global_norm(
                gradients, self._hparams.max_gradient_norm)

        if self._hparams.batch_normalisation is True:
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_op = optimiser.apply_gradients(
                    grads_and_vars=zip(gradients, variables),
                    global_step=tf.train.get_global_step())
        else:
            self.train_op = optimiser.apply_gradients(
                grads_and_vars=zip(gradients, variables),
                global_step=tf.train.get_global_step())
Beispiel #4
0
def build_model(encoders):
    """Builds and compiles the model from scratch.

    # Arguments
        encoders: dict of encoders (used to set size of text/categorical inputs)

    # Returns
        model: A compiled model which can be used to train or predict.
    """

    # make
    input_make = Input(shape=(4, ), name="input_make")

    # body
    input_body_size = len(encoders['body_encoder'].classes_)
    input_body = Input(
        shape=(input_body_size if input_body_size != 2 else 1, ),
        name="input_body")

    # mileage
    input_mileage = Input(shape=(4, ), name="input_mileage")

    # engV
    input_engv = Input(shape=(4, ), name="input_engv")

    # engType
    input_engtype_size = len(encoders['engtype_encoder'].classes_)
    input_engtype = Input(
        shape=(input_engtype_size if input_engtype_size != 2 else 1, ),
        name="input_engtype")

    # registration
    input_registration_size = len(encoders['registration_encoder'].classes_)
    input_registration = Input(shape=(input_registration_size if
                                      input_registration_size != 2 else 1, ),
                               name="input_registration")

    # year
    input_year = Input(shape=(4, ), name="input_year")

    # drive
    input_drive_size = len(encoders['drive_encoder'].classes_)
    input_drive = Input(
        shape=(input_drive_size if input_drive_size != 2 else 1, ),
        name="input_drive")

    # Combine all the inputs into a single layer
    concat = concatenate([
        input_make, input_body, input_mileage, input_engv, input_engtype,
        input_registration, input_year, input_drive
    ],
                         name="concat")

    # Multilayer Perceptron (MLP) to find interactions between all inputs
    hidden = Dense(64,
                   activation='selu',
                   name='hidden_1',
                   kernel_regularizer=None)(concat)
    hidden = AlphaDropout(0.5, name="dropout_1")(hidden)

    for i in range(2 - 1):
        hidden = Dense(128,
                       activation="selu",
                       name="hidden_{}".format(i + 2),
                       kernel_regularizer=None)(hidden)
        hidden = AlphaDropout(0.5, name="dropout_{}".format(i + 2))(hidden)
    output = Dense(1, name="output", kernel_regularizer=l2(1e-2))(hidden)

    # Build and compile the model.
    model = Model(inputs=[
        input_make, input_body, input_mileage, input_engv, input_engtype,
        input_registration, input_year, input_drive
    ],
                  outputs=[output])
    model.compile(loss="msle",
                  optimizer=AdamWOptimizer(learning_rate=0.1,
                                           weight_decay=0.025))

    return model
Beispiel #5
0
def build_model(encoders):
    """Builds and compiles the model from scratch.

    # Arguments
        encoders: dict of encoders (used to set size of text/categorical inputs)

    # Returns
        model: A compiled model which can be used to train or predict.
    """

    # Pclass
    input_pclass_size = len(encoders['pclass_encoder'].classes_)
    input_pclass = Input(shape=(
        input_pclass_size if input_pclass_size != 2 else 1,), name="input_pclass")

    # Sex
    input_sex_size = len(encoders['sex_encoder'].classes_)
    input_sex = Input(
        shape=(input_sex_size if input_sex_size != 2 else 1,), name="input_sex")

    # Age
    input_age = Input(shape=(10,), name="input_age")

    # Siblings/Spouses Aboard
    input_siblings_spouses_aboard_size = len(
        encoders['siblings_spouses_aboard_encoder'].classes_)
    input_siblings_spouses_aboard = Input(shape=(
        input_siblings_spouses_aboard_size if input_siblings_spouses_aboard_size != 2 else 1,), name="input_siblings_spouses_aboard")

    # Parents/Children Aboard
    input_parents_children_aboard_size = len(
        encoders['parents_children_aboard_encoder'].classes_)
    input_parents_children_aboard = Input(shape=(
        input_parents_children_aboard_size if input_parents_children_aboard_size != 2 else 1,), name="input_parents_children_aboard")

    # Fare
    input_fare = Input(shape=(10,), name="input_fare")

    # Combine all the inputs into a single layer
    concat = concatenate([
        input_pclass,
        input_sex,
        input_age,
        input_siblings_spouses_aboard,
        input_parents_children_aboard,
        input_fare
    ], name="concat")

    # Multilayer Perceptron (MLP) to find interactions between all inputs
    hidden = Dense(256, activation="relu", name="hidden_1",
                   kernel_regularizer=l2(1e-3))(concat)
    hidden = BatchNormalization(name="bn_1")(hidden)
    hidden = Dropout(0.0, name="dropout_1")(hidden)

    for i in range(2-1):
        hidden = Dense(64, activation="relu", name="hidden_{}".format(
            i+2), kernel_regularizer=l2(1e-3))(hidden)
        hidden = BatchNormalization(name="bn_{}".format(i+2))(hidden)
        hidden = Dropout(0.0, name="dropout_{}".format(i+2))(hidden)

    output = Dense(1, activation="sigmoid", name="output",
                   kernel_regularizer=None)(hidden)

    # Build and compile the model.
    model = Model(inputs=[
        input_pclass,
        input_sex,
        input_age,
        input_siblings_spouses_aboard,
        input_parents_children_aboard,
        input_fare
    ],
        outputs=[output])
    model.compile(loss="binary_crossentropy",
                  optimizer=AdamWOptimizer(learning_rate=0.1,
                                           weight_decay=0.05))

    return model
Beispiel #6
0
def build_model(encoders):
    """Builds and compiles the model from scratch.

    # Arguments
        encoders: dict of encoders (used to set size of text/categorical inputs)

    # Returns
        model: A compiled model which can be used to train or predict.
    """

    # Unnamed: 0
    input_unnamed_0 = Input(shape=(10, ), name="input_unnamed_0")

    # Month
    input_month_size = len(encoders['month_encoder'].classes_)
    input_month = Input(
        shape=(input_month_size if input_month_size != 2 else 1, ),
        name="input_month")

    # Year
    input_year_size = len(encoders['year_encoder'].classes_)
    input_year = Input(
        shape=(input_year_size if input_year_size != 2 else 1, ),
        name="input_year")

    # sher
    input_sher = Input(shape=(10, ), name="input_sher")

    # Combine all the inputs into a single layer
    concat = concatenate(
        [input_unnamed_0, input_month, input_year, input_sher], name="concat")

    # Multilayer Perceptron (MLP) to find interactions between all inputs
    hidden = Dense(64,
                   activation="relu",
                   name="hidden_1",
                   kernel_regularizer=None)(concat)
    hidden = BatchNormalization(name="bn_1")(hidden)
    hidden = Dropout(0.5, name="dropout_1")(hidden)

    for i in range(4 - 1):
        hidden = Dense(256,
                       activation="relu",
                       name="hidden_{}".format(i + 2),
                       kernel_regularizer=None)(hidden)
        hidden = BatchNormalization(name="bn_{}".format(i + 2))(hidden)
        hidden = Dropout(0.5, name="dropout_{}".format(i + 2))(hidden)

    output = Dense(encoders['sales_encoder'].classes_.shape[0],
                   activation="softmax",
                   name="output",
                   kernel_regularizer=l2(1e-2))(hidden)

    # Build and compile the model.
    model = Model(
        inputs=[input_unnamed_0, input_month, input_year, input_sher],
        outputs=[output])
    model.compile(loss="categorical_crossentropy",
                  optimizer=AdamWOptimizer(learning_rate=0.0001,
                                           weight_decay=0.025))

    return model