コード例 #1
0
ファイル: Text.py プロジェクト: jordanosborn/tf2-keras-pandas
    def input_nub_generator(variable, transformed_observations):
        """
        Generate an input layer and input 'nub' for a Keras network.

         - input_layer: The input layer accepts data from the outside world.
         - input_nub: The input nub will always include the input_layer as its first layer. It may also include
         other layers for handling the data type in specific ways

        :param variable: Name of the variable
        :type variable: str
        :param transformed_observations: A dataframe, containing either the specified variable, or derived variables
        :type transformed_observations: pandas.DataFrame
        :return: A tuple containing the input layer, and the last layer of the nub
        """
        logging.info('Creating input nub for: {}'.format(variable))
        # Get transformed data for shaping. One column per token.
        if variable in transformed_observations.columns:
            variable_list = [variable]
        else:
            variable_name_prefix = variable + '_'
            variable_list = list(filter(lambda x: x.startswith(variable_name_prefix), transformed_observations.columns))
        logging.info('Determined variable list: {}'.format(variable_list))

        # Pull transformed data as matrix
        transformed = transformed_observations[variable_list].as_matrix()

        # Determine sequence length
        if len(transformed.shape) >= 2:
            # If we have multiple columns, it's one column per word
            input_sequence_length = int(transformed.shape[1])
        else:
            # If there are not multiple columns, there is only one word
            input_sequence_length = 1

        # Determine vocabulary size (number of rows in the embedding). The additional offsets are due to 1  for len
        # vs indexing w/ 0, 1 for unknown token, and the others for something else?
        vocab_size = int(numpy.max(transformed)) + 4

        # Determine embedding output size
        # TODO There must be a better heuristic
        embedding_output_dim = 200

        logging.info('Creating embedding for text_var: {}, with input_sequence_length: {}, vocab size: {}, '
                     'and embedding_output_dim: {}'.format(variable, input_sequence_length, vocab_size,
                                                           embedding_output_dim))

        # Create and stack layers
        input_layer = keras.Input(shape=(input_sequence_length,),
                                  name=lib.namespace_conversion('input_{}'.format(variable)))
        x = input_layer
        x = Embedding(input_dim=vocab_size, output_dim=embedding_output_dim, input_length=input_sequence_length,
                      name=lib.namespace_conversion('embedding_{}'.format(variable)))(x)
        x = Bidirectional(LSTM(128,
                               name=lib.namespace_conversion('lstm_{}'.format(variable))),
                          name=lib.namespace_conversion('bidirectiona_lstm_{}'.format(variable)))(x)

        input_nub = x

        # Return
        return input_layer, input_nub
コード例 #2
0
ファイル: testLib.py プロジェクト: welkinland/keras-pandas
 def test_namespace_conversion(self):
     placeholder(name=lib.namespace_conversion(' asdf @$@#$@#'))
     placeholder(name=lib.namespace_conversion('asdf @$ @#$@#'))
     placeholder(name=lib.namespace_conversion('12342342'))
     iris_vars = [
         'sepal length in cm', 'sepal width in cm', 'petal length in cm',
         'petal width in cm', 'class', 'Iris Setosa', 'Iris Versicolour',
         'Iris Virginica'
     ]
     for var in iris_vars:
         placeholder(name=lib.namespace_conversion(var))
     pass
コード例 #3
0
ファイル: TimeSeries.py プロジェクト: gissong/keras-pandas
    def input_nub_generator(variable, transformed_observations):
        """
        Generate an input layer and input 'nub' for a Keras network.

         - input_layer: The input layer accepts data from the outside world.
         - input_nub: The input nub will always include the input_layer as its first layer. It may also include
         other layers for handling the data type in specific ways

        :param variable: Name of the variable
        :type variable: str
        :param transformed_observations: A dataframe, containing either the specified variable, or derived variables
        :type transformed_observations: pandas.DataFrame
        :return: A tuple containing the input layer, and the last layer of the nub
        """

        # Get transformed data for shaping
        if variable in transformed_observations.columns:
            variable_list = [variable]
        else:
            variable_name_prefix = variable + '_'
            variable_list = list(
                filter(lambda x: x.startswith(variable_name_prefix),
                       transformed_observations.columns))
        transformed = transformed_observations[variable_list].as_matrix()

        # Set up sequence length for input_layer
        if len(transformed.shape) >= 2:
            input_sequence_length = int(transformed.shape[1])
        else:
            input_sequence_length = 1
        logging.info(
            'For variable: {}, using input_sequence_length: {}'.format(
                variable, input_sequence_length))

        # Create and stack layers
        input_layer = keras.Input(shape=(input_sequence_length, ),
                                  name=lib.namespace_conversion(
                                      'input_{}'.format(variable)))
        x = input_layer
        x = Reshape((input_sequence_length, 1))(x)
        x = Bidirectional(LSTM(32,
                               name=lib.namespace_conversion(
                                   'lstm_{}'.format(variable))),
                          name=lib.namespace_conversion(
                              'bidirectional_lstm_{}'.format(variable)))(x)

        input_nub = x

        return input_layer, input_nub
コード例 #4
0
    def input_nub_generator(variable, transformed_observations):
        """
        Generate an input layer and input 'nub' for a Keras network.

         - input_layer: The input layer accepts data from the outside world.
         - input_nub: The input nub will always include the input_layer as its first layer. It may also include
         other layers for handling the data type in specific ways

        :param variable: Name of the variable
        :type variable: str
        :param transformed_observations: A dataframe, containing either the specified variable, or derived variables
        :type transformed_observations: pandas.DataFrame
        :return: A tuple containing the input layer, and the last layer of the nub
        """
        # Get transformed data for shaping
        transformed = transformed_observations[variable].as_matrix()

        # Set up dimensions for input_layer layer
        if len(transformed.shape) >= 2:
            input_sequence_length = int(transformed.shape[1])
        else:
            input_sequence_length = 1

        # TODO Convert below to numpy.max (?)
        categorical_num_levels = int(max(transformed)) + 2
        embedding_output_dim = int(min((categorical_num_levels + 1) / 2, 50))

        logging.info(
            'Creating embedding for cat_var: {}, with input_sequence_length: {}, categorical_num_levels: {}, '
            'and embedding_output_dim: {}'.format(variable,
                                                  input_sequence_length,
                                                  categorical_num_levels,
                                                  embedding_output_dim))

        input_layer = keras.Input(shape=(input_sequence_length, ),
                                  name=lib.namespace_conversion(
                                      'input_{}'.format(variable)))
        x = input_layer
        x = Embedding(input_dim=categorical_num_levels,
                      output_dim=embedding_output_dim,
                      input_length=input_sequence_length,
                      name=lib.namespace_conversion(
                          'embedding_{}'.format(variable)))(x)
        x = Flatten(name=lib.namespace_conversion(
            'flatten_embedding_{}'.format(variable)))(x)

        input_nub = x

        return input_layer, input_nub
コード例 #5
0
ファイル: Boolean.py プロジェクト: welkinland/keras-pandas
    def input_nub_generator(self, variable, transformed_observations):
        """
        Generate an input layer and input 'nub' for a Keras network.

         - input_layer: The input layer accepts data from the outside world.
         - input_nub: The input nub will always include the input_layer as its first layer. It may also include
         other layers for handling the data type in specific ways

        :param variable: Name of the variable
        :type variable: str
        :param transformed_obervations: A dataframe, containing either the specified variable, or derived variables
        :type transformed_obervations: pandas.DataFrame
        :return: A tuple containing the input layer, and the last layer of the nub
        """

        transformed = transformed_observations[variable].as_matrix()

        # Set up dimensions for input_layer layer
        if len(transformed.shape) >= 2:
            input_sequence_length = int(transformed.shape[1])
        else:
            input_sequence_length = 1

        # Create input_layer layer
        input_layer = keras.Input(shape=(input_sequence_length,),
                                  name=lib.namespace_conversion('input_{}'.format(variable)))
        input_nub = input_layer

        # Return, in format of input_layer, last variable-specific layer
        return input_layer, input_nub