def __init__(self, config: dict, sequence: bool): """ Parse a feature from config file Args: config: Parsed JSON for this feature sequence: True if is a item sequence feature. False if is a transaction feature """ # Feature name self.name:str = read_setting(config, 'name', str, Exception("'name' property expected")) # If = 0, feature will not be embedded. If > 0, the embedding dimension self.embedding_dim:int = read_setting(config, 'embedding_dim', int, 0) # Max. number of labels to use as input / predict. All, if == 0. self.max_labels = read_setting(config, 'max_labels', int, 0) # Belongs to items sequence? self.sequence = sequence # Label values self.labels: Labels = None # Embedding layer for this feature self.embedding_layer: tf.keras.layers.Embedding = None
def create_rnn(encoded_inputs): # Model settings layer_size = read_setting(settings.model_config, 'rnn_layer_size', int, 128) dropout_ratio = read_setting(settings.model_config, 'rnn_dropout_ratio', float, 0.2) bidirectional = read_setting(settings.model_config, 'rnn_bidirectional', bool, True) rnn_n_layers = read_setting(settings.model_config, 'rnn_n_layers', int, 1) # Define layers x = encoded_inputs for i in range(rnn_n_layers): return_sequences = (i < (rnn_n_layers - 1)) rnn_layer = tf.keras.layers.GRU(layer_size, name="rnn_" + str(i), return_sequences=return_sequences) if bidirectional: rnn_layer = tf.keras.layers.Bidirectional(rnn_layer, name="rnn_bidir") x = rnn_layer(x) if dropout_ratio > 0: x = tf.keras.layers.Dropout(dropout_ratio, name="dropout_" + str(i))(x) return x
def create_conv(encoded_inputs): # Model settings n_layers = read_setting(settings.model_config, 'conv_n_layers', int, 2) layer_size = read_setting(settings.model_config, 'conv_layer_size', int, 128) kernel_size = read_setting(settings.model_config, 'conv_kernel_size', int, 4) strides = read_setting(settings.model_config, 'conv_strides', int, 1) pool_size = read_setting(settings.model_config, 'conv_pool_size', int, 0) # Convolution x = encoded_inputs for i in range(n_layers): #print(x, layer_size, kernel_size) x = tf.keras.layers.Conv1D(layer_size, kernel_size, strides=strides, activation='relu', name="conv_" + str(i))(x) if pool_size > 0: x = tf.keras.layers.MaxPooling1D(pool_size)(x) # Flatten convolution outputs x = tf.keras.layers.Flatten()(x) return x
def create_dense(encoded_inputs): # Model settings n_layers = read_setting(settings.model_config, 'dense_n_layers', int, 2) layer_size = read_setting(settings.model_config, 'dense_layer_size', int, 128) activation = read_setting(settings.model_config, 'dense_activation', str, 'relu') # Define DNN x = encoded_inputs for i in range(n_layers): x = tf.keras.layers.Dense(layer_size, name="dnn_" + str(i), activation=activation)(x) return x
def create_ensemble_model(inputs: ModelInputs, rating_model: bool) -> tf.keras.Model: # Get encoded sequence inputs encoded_inputs = inputs.get_all_as_sequence() # Apply RNN to inputs rnn_x = create_rnn(encoded_inputs) # Apply convolution to inputs conv_x = create_conv(encoded_inputs) # Apply dense # items_as_multihot = dense_model.items_as_multihot(inputs) # dense_x = dense_model.create_dense(items_as_multihot) # Merge convolution and RNN result #x = tf.keras.layers.Concatenate()( [rnn_x , conv_x, dense_x] ) x = tf.keras.layers.Concatenate()([rnn_x, conv_x]) # "Ensemble" results ensemble_layer_size = read_setting(settings.model_config, 'ensemble_layer_size', int, 512) if ensemble_layer_size > 0: x = tf.keras.layers.Dense(ensemble_layer_size, activation='relu')(x) # Output layer x = dense_model.create_output_layer(inputs, x, rating_model) return tf.keras.Model(inputs=inputs.inputs, outputs=x)
def __init__(self): # Read config JSON file, if it was specified cmd_line_options = self._parse_cmd_line() # Get configuration file location if cmd_line_options.configfile == None and 'MARKETBASKET_CONFIG_FILE_PATH' in os.environ: # Not specified in command line. Get from variable environment cmd_line_options.configfile = os.environ[ 'MARKETBASKET_CONFIG_FILE_PATH'] if cmd_line_options.configfile == None and os.path.exists( 'data/config.json'): # Use this as default cmd_line_options.configfile = 'data/config.json' # Load config file if cmd_line_options.configfile != None: settings_json = self._load_config_file(cmd_line_options.configfile) else: settings_json = {} # Setup configuration # Configuration file source self.config_file_path = cmd_line_options.configfile # Max number of items to handle self.n_max_items = read_setting(settings_json, 'n_max_items', int, 100) # Max number customers to handle. If zero, customer code will not be trained self.n_max_customers = read_setting(settings_json, 'n_max_customers', int, 100) # Ratio (1 = 100%) of samples to use for evaluation self.evaluation_ratio = read_setting(settings_json, 'evaluation_ratio', float, 0.15) # Batch size self.batch_size = read_setting(settings_json, 'batch_size', int, 64) # Epochs to train self.n_epochs = read_setting(settings_json, 'n_epochs', int, 15) # Use class weights to correct labels imbalance? self.class_weight = read_setting(settings_json, 'class_weight', bool, False) # Model type self.model_type = read_setting(settings_json, 'model_type', ModelType, "convolutional") # Sequence length self.sequence_length = read_setting(settings_json, 'sequence_length', int, 16) # Sequence - Items embeding dimension # TODO: Remove this self.items_embedding_dim = read_setting(settings_json, 'items_embedding_dim', int, 128) # Sequence - Customers embeding dimension # TODO: Remove this self.customers_embedding_dim = read_setting(settings_json, 'customers_embedding_dim', int, 64) # Transactions file path self.transactions_file = read_setting(settings_json, 'transactions_file', str, 'data/transactions.csv') # Candidates model generation directory self.model_dir = read_setting(settings_json, 'candidates_model_dir', str, 'models/candidates_model') # Rating model generation directory self.rating_model_dir = read_setting(settings_json, 'rating_model_dir', str, 'models/rating_model') # Number of candidates to rate for rating model self.n_candidates = read_setting(settings_json, 'n_candidates', int, 32) # Sequence - Customers embeding dimension self.customers_embedding_dim = read_setting(settings_json, 'customers_embedding_dim', int, 64) # Train verbose log level self.train_log_level = cmd_line_options.trainlog # TODO: Is not working... # Log level for TF core (C++). This MUST to be executed before import tf # See https://stackoverflow.com/questions/35869137/avoid-tensorflow-print-on-standard-error # See https://github.com/tensorflow/tensorflow/issues/31870 self.tf_log_level = read_setting(settings_json, 'tf_log_level', str, 'WARNING') if self.tf_log_level == 'WARNING': os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' elif self.tf_log_level == 'ERROR': os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Read features configuration self.features = features_set.FeaturesSet(settings_json['features']) # Model configuration self.model_config: Dict = read_setting(settings_json, 'model_config', dict, {})