def prepare_decoder_inputs(self, combiner_output, other_output_features, training=None, mask=None): """ Takes the combiner output and the outputs of other outputs features computed so far and performs: - reduction of combiner outputs (if needed) - concatenating the outputs of dependent features (if needed) - output_specific fully connected layers (if needed) :param combiner_output: output tensor of the combiner :param other_output_features: output tensors from other features :return: tensor """ feature_hidden = combiner_output # ================ Reduce Inputs ================ if self.reduce_input is not None and len(feature_hidden.shape) > 2: feature_hidden = reduce_sequence(feature_hidden, self.reduce_input) # ================ Concat Dependencies ================ feature_hidden = self.concat_dependencies(feature_hidden, other_output_features) # ================ Output-wise Fully Connected ================ feature_hidden = self.output_specific_fully_connected( feature_hidden, training=training, mask=mask) return feature_hidden
def prepare_encoder_output_state(self, inputs): if 'encoder_output_state' in inputs: encoder_output_state = inputs['encoder_output_state'] else: hidden = inputs['hidden'] if len(hidden.shape) == 3: # encoder_output is a sequence # reduce_sequence returns a [b, h] encoder_output_state = reduce_sequence( hidden, self.reduce_input if self.reduce_input else 'sum' ) elif len(hidden.shape) == 2: # this returns a [b, h] encoder_output_state = hidden else: raise ValueError("Only works for 1d or 2d encoder_output") # now we have to deal with the fact that the state needs to be a list # in case of lstm or a tensor otherwise if (self.cell_type == 'lstm' and isinstance(encoder_output_state, list)): if len(encoder_output_state) == 2: # this maybe a unidirectionsl lstm or a bidirectional gru / rnn # there is no way to tell # If it is a unidirectional lstm, pass will work fine # if it is bidirectional gru / rnn, the output of one of # the directions will be treated as the inital c of the lstm # which is weird and may lead to poor performance # todo try to find a way to distinguish among these two cases pass elif len(encoder_output_state) == 4: # the encoder was a bidirectional lstm # a good strategy is to average the 2 h and the 2 c vectors encoder_output_state = [ average( [encoder_output_state[0], encoder_output_state[2]] ), average( [encoder_output_state[1], encoder_output_state[3]] ) ] else: # no idea how lists of length different than 2 or 4 # might have been originated, we can either rise an ValueError # or deal with it averaging everything # raise ValueError( # "encoder_output_state has length different than 2 or 4. " # "Please doublecheck your encoder" # ) average_state = average(encoder_output_state) encoder_output_state = [average_state, average_state] elif (self.cell_type == 'lstm' and not isinstance(encoder_output_state, list)): encoder_output_state = [encoder_output_state, encoder_output_state] elif (self.cell_type != 'lstm' and isinstance(encoder_output_state, list)): # here we have a couple options, # either reuse part of the input encoder state, # or just use its output if len(encoder_output_state) == 2: # using h and ignoring c encoder_output_state = encoder_output_state[0] elif len(encoder_output_state) == 4: # using average of hs and ignoring cs encoder_output_state + average( [encoder_output_state[0], encoder_output_state[2]] ) else: # no idea how lists of length different than 2 or 4 # might have been originated, we can either rise an ValueError # or deal with it averaging everything # raise ValueError( # "encoder_output_state has length different than 2 or 4. " # "Please doublecheck your encoder" # ) encoder_output_state = average(encoder_output_state) # this returns a [b, h] # decoder_input_state = reduce_sequence(eo, self.reduce_input) elif (self.cell_type != 'lstm' and not isinstance(encoder_output_state, list)): # do nothing, we are good pass # at this point decoder_input_state is either a [b,h] # or a list([b,h], [b,h]) if the decoder cell is an lstm # but h may not be the same as the decoder state size, # so we may need to project if isinstance(encoder_output_state, list): for i in range(len(encoder_output_state)): if (encoder_output_state[i].shape[1] != self.state_size): encoder_output_state[i] = self.project( encoder_output_state[i] ) else: if encoder_output_state.shape[1] != self.state_size: encoder_output_state = self.project( encoder_output_state ) return encoder_output_state
def __call__( self, inputs, # encoder outputs training=None, mask=None, **kwargs): if (self.main_sequence_feature is None or self.main_sequence_feature not in inputs): for if_name, if_outputs in inputs.items(): # todo: when https://github.com/uber/ludwig/issues/810 is closed # convert following test from using shape to use explicit # if_outputs['type'] values for sequence features if len(if_outputs['encoder_output'].shape) == 3: self.main_sequence_feature = if_name break if self.main_sequence_feature is None: raise Exception( 'No sequence feature available for sequence combiner') main_sequence_feature_encoding = inputs[self.main_sequence_feature] representation = main_sequence_feature_encoding['encoder_output'] representations = [representation] sequence_max_length = representation.shape[1] sequence_length = sequence_length_3D(representation) # ================ Concat ================ for if_name, if_outputs in inputs.items(): if if_name != self.main_sequence_feature: if_representation = if_outputs['encoder_output'] if len(if_representation.shape) == 3: # The following check makes sense when # both representations have a specified # sequence length dimension. If they do not, # then this check is simply checking if None == None # and will not catch discrepancies in the different # feature length dimension. Those errors will show up # at training time. Possible solutions to this is # to enforce a length second dimension in # sequential feature placeholders, but that # does not work with BucketedBatcher that requires # the second dimension to be undefined in order to be # able to trim the data points and speed up computation. # So for now we are keeping things like this, make sure # to write in the documentation that training time # dimensions mismatch may occur if the sequential # features have different lengths for some data points. if if_representation.shape[1] != representation.shape[1]: raise ValueError( 'The sequence length of the input feature {} ' 'is {} and is different from the sequence ' 'length of the main sequence feature {} which ' 'is {}.\n Shape of {}: {}, shape of {}: {}.\n' 'Sequence lengths of all sequential features ' 'must be the same in order to be concatenated ' 'by the sequence concat combiner. ' 'Try to impose the same max sequence length ' 'as a preprocessing parameter to both features ' 'or to reduce the output of {}.'.format( if_name, if_representation.shape[1], self.main_sequence_feature, representation.shape[1], if_name, if_representation.shape, if_name, representation.shape, if_name)) # this assumes all sequence representations have the # same sequence length, 2nd dimension representations.append(if_representation) elif len(if_representation.shape) == 2: multipliers = tf.constant([1, sequence_max_length, 1]) tiled_representation = tf.tile( tf.expand_dims(if_representation, 1), multipliers) representations.append(tiled_representation) else: raise ValueError( 'The representation of {} has rank {} and cannot be' ' concatenated by a sequence concat combiner. ' 'Only rank 2 and rank 3 tensors are supported.'.format( if_outputs['name'], len(if_representation.shape))) hidden = tf.concat(representations, 2) logger.debug(' concat_hidden: {0}'.format(hidden)) # ================ Mask ================ # todo tf2: maybe use tf2 masking sequence_mask = tf.sequence_mask(sequence_length, sequence_max_length) hidden = tf.multiply( hidden, tf.cast(tf.expand_dims(sequence_mask, -1), dtype=tf.float32)) # ================ Reduce ================ hidden = reduce_sequence(hidden, self.reduce_output) return_data = {'combiner_output': hidden} if len(inputs) == 1: for key, value in [d for d in inputs.values()][0].items(): if key != 'encoder_output': return_data[key] = value return return_data
def concat_dependencies(self, hidden, other_features_hidden): if len(self.dependencies) > 0: dependencies_hidden = [] for dependency in self.dependencies: # the dependent feature is ensured to be present in final_hidden # because we did the topological sort of the features before dependency_final_hidden = other_features_hidden[dependency] if len(hidden.shape) > 2: if len(dependency_final_hidden.shape) > 2: # matrix matrix -> concat assert hidden.shape[1] == \ dependency_final_hidden.shape[1] dependencies_hidden.append(dependency_final_hidden) else: # matrix vector -> tile concat sequence_max_length = hidden.shape[1] multipliers = tf.concat( [[1], [sequence_max_length], [1]], 0) tiled_representation = tf.tile( tf.expand_dims(dependency_final_hidden, 1), multipliers) # todo tf2: maybe modify this with TF2 mask mechanics sequence_length = sequence_length_3D(hidden) mask = tf.sequence_mask(sequence_length, sequence_max_length) tiled_representation = tf.multiply( tiled_representation, tf.cast(mask[:, :, tf.newaxis], dtype=tf.float32)) dependencies_hidden.append(tiled_representation) else: if len(dependency_final_hidden.shape) > 2: # vector matrix -> reduce concat dependencies_hidden.append( reduce_sequence(dependency_final_hidden, self.reduce_dependencies)) else: # vector vector -> concat dependencies_hidden.append(dependency_final_hidden) try: hidden = tf.concat([hidden] + dependencies_hidden, -1) except: raise ValueError( 'Shape mismatch while concatenating dependent features of ' '{}: {}. Concatenating the feature activations tensor {} ' 'with activation tensors of dependencies: {}. The error is ' 'likely due to a mismatch of the second dimension (sequence' ' length) or a difference in ranks. Likely solutions are ' 'setting the maximum_sequence_length of all sequential ' 'features to be the same, or reduce the output of some ' 'features, or disabling the bucketing setting ' 'bucketing_field to None / null, as activating it will ' 'reduce the length of the field the bucketing is performed ' 'on.'.format(self.feature_name, self.dependencies, hidden, dependencies_hidden)) return hidden
def call( self, inputs, training=None, mask=None ): """ :param input_vector: The input vector fed into the encoder. Shape: [batch x 19], type tf.int8 :type input_vector: Tensor :param training: bool specifying if in training mode (important for dropout) :type training: bool :param mask: bool tensor encoding masked timesteps in the input :type mask: bool """ input_vector = tf.cast(inputs, tf.int32) # ================ Embeddings ================ embedded_mode = self.embed_mode( input_vector[:, 0:1], training=training, mask=mask ) embedded_edge = self.embed_edge( input_vector[:, 1:2], training=training, mask=mask ) embedded_resolution = self.embed_resolution( input_vector[:, 2:3], training=training, mask=mask ) embedded_base_cell = self.embed_base_cell( input_vector[:, 3:4], training=training, mask=mask ) embedded_cells = self.embed_cells( input_vector[:, 4:], training=training, mask=mask ) # ================ Masking ================ resolution = input_vector[:, 2] mask = tf.cast( tf.expand_dims(tf.sequence_mask(resolution, 15), -1), dtype=tf.float32 ) masked_embedded_cells = embedded_cells * mask # ================ Reduce ================ concatenated = tf.concat( [embedded_mode, embedded_edge, embedded_resolution, embedded_base_cell, masked_embedded_cells], axis=1) hidden = reduce_sequence(concatenated, self.reduce_output) # ================ FC Stack ================ # logger.debug(' flatten hidden: {0}'.format(hidden)) hidden = self.fc_stack( hidden, training=training, mask=mask ) return {'encoder_output': hidden}