예제 #1
0
    def __init__(self, config: dict, sequence: bool):
        """ Parse a feature from config file

            Args:
                config: Parsed JSON for this feature
                sequence: True if is a item sequence feature. False if is a transaction feature
        """

        # Feature name
        self.name:str = read_setting(config, 'name', str, Exception("'name' property expected"))

        # If = 0, feature will not be embedded. If > 0, the embedding dimension
        self.embedding_dim:int = read_setting(config, 'embedding_dim', int, 0)

        # Max. number of labels to use as input / predict. All, if == 0.
        self.max_labels = read_setting(config, 'max_labels', int, 0)

        # Belongs to items sequence?
        self.sequence = sequence

        # Label values
        self.labels: Labels = None

        # Embedding layer for this feature
        self.embedding_layer: tf.keras.layers.Embedding = None
예제 #2
0
def create_rnn(encoded_inputs):
    # Model settings
    layer_size = read_setting(settings.model_config, 'rnn_layer_size', int,
                              128)
    dropout_ratio = read_setting(settings.model_config, 'rnn_dropout_ratio',
                                 float, 0.2)
    bidirectional = read_setting(settings.model_config, 'rnn_bidirectional',
                                 bool, True)
    rnn_n_layers = read_setting(settings.model_config, 'rnn_n_layers', int, 1)

    # Define layers
    x = encoded_inputs
    for i in range(rnn_n_layers):
        return_sequences = (i < (rnn_n_layers - 1))
        rnn_layer = tf.keras.layers.GRU(layer_size,
                                        name="rnn_" + str(i),
                                        return_sequences=return_sequences)
        if bidirectional:
            rnn_layer = tf.keras.layers.Bidirectional(rnn_layer,
                                                      name="rnn_bidir")
        x = rnn_layer(x)

        if dropout_ratio > 0:
            x = tf.keras.layers.Dropout(dropout_ratio,
                                        name="dropout_" + str(i))(x)

    return x
예제 #3
0
def create_conv(encoded_inputs):
    # Model settings
    n_layers = read_setting(settings.model_config, 'conv_n_layers', int, 2)
    layer_size = read_setting(settings.model_config, 'conv_layer_size', int,
                              128)
    kernel_size = read_setting(settings.model_config, 'conv_kernel_size', int,
                               4)
    strides = read_setting(settings.model_config, 'conv_strides', int, 1)
    pool_size = read_setting(settings.model_config, 'conv_pool_size', int, 0)

    # Convolution
    x = encoded_inputs
    for i in range(n_layers):
        #print(x, layer_size, kernel_size)
        x = tf.keras.layers.Conv1D(layer_size,
                                   kernel_size,
                                   strides=strides,
                                   activation='relu',
                                   name="conv_" + str(i))(x)
        if pool_size > 0:
            x = tf.keras.layers.MaxPooling1D(pool_size)(x)

    # Flatten convolution outputs
    x = tf.keras.layers.Flatten()(x)

    return x
예제 #4
0
def create_dense(encoded_inputs):
    # Model settings
    n_layers = read_setting(settings.model_config, 'dense_n_layers', int, 2)
    layer_size = read_setting(settings.model_config, 'dense_layer_size', int,
                              128)
    activation = read_setting(settings.model_config, 'dense_activation', str,
                              'relu')

    # Define DNN
    x = encoded_inputs
    for i in range(n_layers):
        x = tf.keras.layers.Dense(layer_size,
                                  name="dnn_" + str(i),
                                  activation=activation)(x)

    return x
예제 #5
0
def create_ensemble_model(inputs: ModelInputs,
                          rating_model: bool) -> tf.keras.Model:

    # Get encoded sequence inputs
    encoded_inputs = inputs.get_all_as_sequence()

    # Apply RNN to inputs
    rnn_x = create_rnn(encoded_inputs)

    # Apply convolution to inputs
    conv_x = create_conv(encoded_inputs)

    # Apply dense
    # items_as_multihot = dense_model.items_as_multihot(inputs)
    # dense_x = dense_model.create_dense(items_as_multihot)

    # Merge convolution and RNN result
    #x = tf.keras.layers.Concatenate()( [rnn_x , conv_x, dense_x] )
    x = tf.keras.layers.Concatenate()([rnn_x, conv_x])

    # "Ensemble" results
    ensemble_layer_size = read_setting(settings.model_config,
                                       'ensemble_layer_size', int, 512)
    if ensemble_layer_size > 0:
        x = tf.keras.layers.Dense(ensemble_layer_size, activation='relu')(x)

    # Output layer
    x = dense_model.create_output_layer(inputs, x, rating_model)

    return tf.keras.Model(inputs=inputs.inputs, outputs=x)
예제 #6
0
    def __init__(self):

        # Read config JSON file, if it was specified
        cmd_line_options = self._parse_cmd_line()

        # Get configuration file location
        if cmd_line_options.configfile == None and 'MARKETBASKET_CONFIG_FILE_PATH' in os.environ:
            # Not specified in command line. Get from variable environment
            cmd_line_options.configfile = os.environ[
                'MARKETBASKET_CONFIG_FILE_PATH']
        if cmd_line_options.configfile == None and os.path.exists(
                'data/config.json'):
            # Use this as default
            cmd_line_options.configfile = 'data/config.json'

        # Load config file
        if cmd_line_options.configfile != None:
            settings_json = self._load_config_file(cmd_line_options.configfile)
        else:
            settings_json = {}

        # Setup configuration

        # Configuration file source
        self.config_file_path = cmd_line_options.configfile

        # Max number of items to handle
        self.n_max_items = read_setting(settings_json, 'n_max_items', int, 100)

        # Max number customers to handle. If zero, customer code will not be trained
        self.n_max_customers = read_setting(settings_json, 'n_max_customers',
                                            int, 100)

        # Ratio (1 = 100%) of samples to use for evaluation
        self.evaluation_ratio = read_setting(settings_json, 'evaluation_ratio',
                                             float, 0.15)

        # Batch size
        self.batch_size = read_setting(settings_json, 'batch_size', int, 64)

        # Epochs to train
        self.n_epochs = read_setting(settings_json, 'n_epochs', int, 15)

        # Use class weights to correct labels imbalance?
        self.class_weight = read_setting(settings_json, 'class_weight', bool,
                                         False)

        # Model type
        self.model_type = read_setting(settings_json, 'model_type', ModelType,
                                       "convolutional")

        # Sequence length
        self.sequence_length = read_setting(settings_json, 'sequence_length',
                                            int, 16)

        # Sequence - Items embeding dimension
        # TODO: Remove this
        self.items_embedding_dim = read_setting(settings_json,
                                                'items_embedding_dim', int,
                                                128)

        # Sequence - Customers embeding dimension
        # TODO: Remove this
        self.customers_embedding_dim = read_setting(settings_json,
                                                    'customers_embedding_dim',
                                                    int, 64)

        # Transactions file path
        self.transactions_file = read_setting(settings_json,
                                              'transactions_file', str,
                                              'data/transactions.csv')

        # Candidates model generation directory
        self.model_dir = read_setting(settings_json, 'candidates_model_dir',
                                      str, 'models/candidates_model')

        # Rating model generation directory
        self.rating_model_dir = read_setting(settings_json, 'rating_model_dir',
                                             str, 'models/rating_model')

        # Number of candidates to rate for rating model
        self.n_candidates = read_setting(settings_json, 'n_candidates', int,
                                         32)

        # Sequence - Customers embeding dimension
        self.customers_embedding_dim = read_setting(settings_json,
                                                    'customers_embedding_dim',
                                                    int, 64)

        # Train verbose log level
        self.train_log_level = cmd_line_options.trainlog

        # TODO: Is not working...
        # Log level for TF core (C++). This MUST to be executed before import tf
        # See https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error
        # See https://github.com/tensorflow/tensorflow/issues/31870
        self.tf_log_level = read_setting(settings_json, 'tf_log_level', str,
                                         'WARNING')
        if self.tf_log_level == 'WARNING':
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
        elif self.tf_log_level == 'ERROR':
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

        # Read features configuration
        self.features = features_set.FeaturesSet(settings_json['features'])

        # Model configuration
        self.model_config: Dict = read_setting(settings_json, 'model_config',
                                               dict, {})