예제 #1
0
    def _get_outputs(self, inputs, seq_lengths):
        """compute the evaluation logits for a batch of data

		Args:
			inputs: the inputs to the neural network, this is a dictionary of
				[batch_size x ...] tensors
			seq_length: The sequence lengths of the input utterances, this
				is a list of [batch_size] vectors

		Returns:
			the logits"""

        with tf.name_scope('evaluate_logits'):
            logits, _ = run_multi_model.run_multi_model(
                models=self.models,
                model_nodes=self.model_nodes,
                model_links=self.model_links,
                inputs=inputs,
                inputs_links=self.inputs_links,
                nodes_output_names=self.nodes_output_names,
                output_names=self.output_names,
                seq_lengths=seq_lengths,
                is_training=False)

        return logits
예제 #2
0
    def gather_grads(self, optimizer):
        """ Gather gradients for this task"""

        with tf.variable_scope(self.task_name):

            # a variable to hold the batch loss
            self.batch_loss = tf.get_variable(
                name='batch_loss',
                shape=[],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            # a variable to hold the batch loss norm
            self.batch_loss_norm = tf.get_variable(
                name='batch_loss_norm',
                shape=[],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            # normalize the loss
            with tf.variable_scope('normalize_loss'):
                self.normalized_loss = self.batch_loss / self.batch_loss_norm

            self.process_minibatch = []
            for set_ind, linkedset in enumerate(self.linkedsets):

                inputs = dict()
                seq_lengths = dict()
                targets = dict()

                # create the input pipeline
                data, seq_length = input_pipeline.input_pipeline(
                    data_queue=self.data_queue[linkedset],
                    batch_size=self.batch_size,
                    numbuckets=int(self.trainerconf['numbuckets']),
                    dataconfs=self.input_dataconfs[linkedset] +
                    self.target_dataconfs[linkedset])

                # split data into inputs and targets
                for ind, input_name in enumerate(self.input_names):
                    inputs[input_name] = data[ind]
                    seq_lengths[input_name] = seq_length[ind]

                for ind, target_name in enumerate(self.target_names):
                    targets[target_name] = data[len(self.input_names) + ind]

                # get the logits
                logits, used_models = run_multi_model.run_multi_model(
                    models=self.models,
                    model_nodes=self.model_nodes,
                    model_links=self.model_links,
                    inputs=inputs,
                    inputs_links=self.inputs_links,
                    nodes_output_names=self.nodes_output_names,
                    output_names=self.output_names,
                    seq_lengths=seq_lengths,
                    is_training=True)

                # compute the loss
                task_minibatch_loss, task_minibatch_loss_norm = self.loss_computer(
                    targets, logits, seq_lengths)
                task_minibatch_loss *= self.linkedset_weighting[linkedset]
                task_minibatch_loss_norm *= self.linkedset_weighting[linkedset]

                used_variables = run_multi_model.get_variables(used_models)
                task_minibatch_grads_and_vars = optimizer.compute_gradients(
                    task_minibatch_loss, var_list=used_variables)

                (task_minibatch_grads,
                 task_vars) = zip(*task_minibatch_grads_and_vars)

                if set_ind == 0:
                    # # This should have already been done before the loop, but then the trainable parameters where unknown
                    # # gather all trainable parameters
                    # self.params = tf.trainable_variables()

                    self.params = task_vars

                    # a variable to hold all the gradients
                    self.grads = [
                        tf.get_variable(param.op.name,
                                        param.get_shape().as_list(),
                                        initializer=tf.constant_initializer(0),
                                        trainable=False)
                        for param in self.params
                    ]

                # update the batch gradients with the minibatch gradients.
                # If a minibatchgradients is None, the loss does not depent on the specific
                # variable(s) and it will thus not be updated
                with tf.variable_scope('update_gradients_%s' % linkedset):
                    update_gradients = [
                        grad.assign_add(batchgrad) for batchgrad, grad in zip(
                            task_minibatch_grads, self.grads)
                        if batchgrad is not None
                    ]

                acc_loss = self.batch_loss.assign_add(task_minibatch_loss)
                acc_loss_norm = self.batch_loss_norm.assign_add(
                    task_minibatch_loss_norm)

                # group all the operations together that need to be executed to process
                # a minibatch
                self.process_minibatch.append(
                    tf.group(*(update_gradients + [acc_loss] +
                               [acc_loss_norm]),
                             name='update_grads_loss_norm_%s' % linkedset))

            reset_batch_loss = self.batch_loss.assign(0.0)

            reset_batch_loss_norm = self.batch_loss_norm.assign(0.0)

            reset_grad = tf.variables_initializer(self.grads)

            # normalize the gradients if requested.
            with tf.variable_scope('normalize_gradients'):
                if self.trainerconf['normalize_gradients'] == 'True':
                    self.normalize_gradients = [
                        grad.assign(tf.divide(grad, self.batch_loss_norm))
                        for grad in self.grads
                    ]
                else:
                    self.normalize_gradients = [
                        grad.assign(grad) for grad in self.grads
                    ]

            # an op to reset the grads, the loss and the loss norm
            self.reset_grad_loss_norm = tf.group(
                *([reset_grad, reset_batch_loss, reset_batch_loss_norm]),
                name='reset_grad_loss_norm')

            batch_grads_and_vars = zip(self.grads, task_vars)

            with tf.variable_scope('clip'):
                clip_value = float(self.trainerconf['clip_grad_value'])
                # clip the gradients
                batch_grads_and_vars = [
                    (tf.clip_by_value(grad, -clip_value, clip_value), var)
                    for grad, var in batch_grads_and_vars
                ]

        return batch_grads_and_vars
예제 #3
0
    def train(self, learning_rate):
        '''set the training ops for this task'''

        with tf.variable_scope(self.task_name):

            #create the optimizer
            optimizer = tf.train.AdamOptimizer(learning_rate)

            inputs = dict()
            seq_lengths = dict()
            targets = dict()

            for linkedset in self.linkedsets:
                #create the input pipeline
                data, seq_length = input_pipeline.input_pipeline(
                    data_queue=self.data_queue[linkedset],
                    batch_size=self.batch_size,
                    numbuckets=int(self.trainerconf['numbuckets']),
                    dataconfs=self.input_dataconfs[linkedset] +
                    self.target_dataconfs[linkedset])

                #split data into inputs and targets
                for ind, input_name in enumerate(
                        self.linkedsets[linkedset]['inputs']):
                    inputs[input_name] = data[ind]
                    seq_lengths[input_name] = seq_length[ind]

                for ind, target_name in enumerate(
                        self.linkedsets[linkedset]['targets']):
                    targets[target_name] = data[
                        len(self.linkedsets[linkedset]['inputs']) + ind]

        #get the logits
            logits = run_multi_model.run_multi_model(
                models=self.models,
                model_nodes=self.model_nodes,
                model_links=self.model_links,
                inputs=inputs,
                inputs_links=self.inputs_links,
                output_names=self.output_names,
                seq_lengths=seq_lengths,
                is_training=True)

            #TODO: The proper way to exploit data paralellism is via the
            #SyncReplicasOptimizer defined below. However for some reason it hangs
            #and I have not yet found a solution for it. For the moment the gradients
            #are accumulated in a way that does not allow data paralellism and there
            # is no advantage on having multiple workers.

            #create an optimizer that aggregates gradients
            #if int(conf['numbatches_to_aggregate']) > 0:
            #optimizer = tf.train.SyncReplicasOptimizer(
            #opt=optimizer,
            #replicas_to_aggregate=int(
            #conf['numbatches_to_aggregate'])#,
            ##total_num_replicas=num_replicas
            #)

            #a variable to hold the batch loss
            self.batch_loss = tf.get_variable(
                name='batch_loss',
                shape=[],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            reset_batch_loss = self.batch_loss.assign(0.0)

            #a variable to hold the batch loss norm
            self.batch_loss_norm = tf.get_variable(
                name='batch_loss_norm',
                shape=[],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            reset_batch_loss_norm = self.batch_loss_norm.assign(0.0)

            #gather all trainable parameters
            self.params = tf.trainable_variables()

            #a variable to hold all the gradients
            self.grads = [
                tf.get_variable(param.op.name,
                                param.get_shape().as_list(),
                                initializer=tf.constant_initializer(0),
                                trainable=False) for param in self.params
            ]

            reset_grad = tf.variables_initializer(self.grads)

            #compute the loss
            task_minibatch_loss, task_minibatch_loss_norm = self.loss_computer(
                targets, logits, seq_lengths)

            task_minibatch_grads_and_vars = optimizer.compute_gradients(
                task_minibatch_loss)

            (task_minibatch_grads,
             task_vars) = zip(*task_minibatch_grads_and_vars)

            #update the batch gradients with the minibatch gradients.
            #If a minibatchgradients is None, the loss does not depent on the specific
            #variable(s) and it will thus not be updated
            with tf.variable_scope('update_gradients'):
                update_gradients = [
                    grad.assign_add(batchgrad) for batchgrad, grad in zip(
                        task_minibatch_grads, self.grads)
                    if batchgrad is not None
                ]

            acc_loss = self.batch_loss.assign_add(task_minibatch_loss)
            acc_loss_norm = self.batch_loss_norm.assign_add(
                task_minibatch_loss_norm)

            #group all the operations together that need to be executed to process
            #a minibatch
            self.process_minibatch = tf.group(*(update_gradients + [acc_loss] +
                                                [acc_loss_norm]),
                                              name='update_grads_loss_norm')

            #an op to reset the grads, the loss and the loss norm
            self.reset_grad_loss_norm = tf.group(
                *([reset_grad, reset_batch_loss, reset_batch_loss_norm]),
                name='reset_grad_loss_norm')

            #normalize the loss
            with tf.variable_scope('normalize_loss'):
                self.normalized_loss = self.batch_loss / self.batch_loss_norm

        #normalize the gradients if requested.
            with tf.variable_scope('normalize_gradients'):
                if self.trainerconf['normalize_gradients'] == 'True':
                    self.normalize_gradients = [
                        grad.assign(tf.divide(grad, self.batch_loss_norm))
                        for grad in self.grads
                    ]
                else:
                    self.normalize_gradients = [
                        grad.assign(grad) for grad in self.grads
                    ]

            batch_grads_and_vars = zip(self.grads, task_vars)

            with tf.variable_scope('clip'):
                clip_value = float(self.trainerconf['clip_grad_value'])
                #clip the gradients
                batch_grads_and_vars = [
                    (tf.clip_by_value(grad, -clip_value, clip_value), var)
                    for grad, var in batch_grads_and_vars
                ]

        #an op to apply the accumulated gradients to the variables
            self.apply_gradients = optimizer.apply_gradients(
                grads_and_vars=batch_grads_and_vars, name='apply_gradients')