def build_model(self): input_state = keras.layers.Input(shape=self.state_shape, name='state_input') input_action = keras.layers.Input(shape=(self.action_size, ), name='action_input') input_size = self.get_input_size(self.state_shape) out = Reshape((input_size, ))(input_state) with tf.variable_scope(self.scope): for i in range(self.act_insert_block): out = dense_block(out, self.hiddens[i], self.activations[i], self.layer_norm, self.noisy_layer) val = out adv = Concatenate(axis=1)([out, input_action]) for i in range(self.act_insert_block, len(self.hiddens)): val = dense_block(val, self.hiddens[i], self.activations[i], self.layer_norm, self.noisy_layer) adv = dense_block(adv, self.hiddens[i], self.activations[i], self.layer_norm, self.noisy_layer) val = Dense(1, self.out_activation, kernel_initializer=RandomUniform(-3e-3, 3e-3), bias_initializer=RandomUniform(-3e-3, 3e-3))(val) adv = Dense(1, self.out_activation, kernel_initializer=RandomUniform(-3e-3, 3e-3), bias_initializer=RandomUniform(-3e-3, 3e-3))(adv) out = Add()([val, adv]) model = keras.models.Model(inputs=[input_state, input_action], outputs=out) return model
def build(self, input_shape): assert len(input_shape) >= 2 self.input_dim = input_shape[-1] if self.factorised: init_mu = 1. / np.sqrt(self.input_dim.value) init_sig = 0.5 / np.sqrt(self.input_dim.value) else: init_mu = np.sqrt(3. / self.input_dim.value) init_sig = 0.017 self.kernel_mu = self.add_weight(shape=(self.input_dim, self.units), initializer=RandomUniform( -init_mu, init_mu), name='kernel_mu') self.kernel_sigma = self.add_weight(shape=(self.input_dim, self.units), initializer=RandomUniform( -init_sig, init_sig), name='kernel_sigma') if self.use_bias: self.bias_mu = self.add_weight(shape=(self.units, ), initializer=RandomUniform( -init_mu, init_mu), name='bias_mu') self.bias_sigma = self.add_weight(shape=(self.units, ), initializer=RandomUniform( -init_sig, init_sig), name='bias_sigma') self.input_spec = InputSpec(min_ndim=2, axes={-1: self.input_dim}) self.built = True
def build_model(self): with tf.variable_scope(self.scope): input_state = keras.layers.Input(shape=self.state_shape, name="state_input") out = input_state for layer_size, activation in zip(self.embedding_layers, self.embedding_activations): out = dense_block(out, layer_size, activation, self.layer_norm, self.noisy_layer) for layer_size, activation in zip(self.lstm_layers[:-1], self.lstm_activations[:-1]): out = LSTM(layer_size, activation=activation, return_sequences=True)(out) out = LSTM(units=self.lstm_layers[-1], activation=self.lstm_activations[-1])(out) for layer_size, activation in zip(self.output_layers, self.output_layers_activations): out = dense_block(out, layer_size, activation, self.layer_norm, self.noisy_layer) out = Dense(self.action_size, self.output_activation, kernel_initializer=RandomUniform(-3e-3, 3e-3), bias_initializer=RandomUniform(-3e-3, 3e-3))(out) model = keras.models.Model(inputs=[input_state], outputs=out) return model
def build_model(self): input_state = keras.layers.Input(shape=self.state_shape, name="state_input") input_size = self.get_input_size(self.state_shape) out = Reshape((input_size, ))(input_state) with tf.variable_scope(self.scope): for i in range(len(self.hiddens)): out = dense_block(out, self.hiddens[i], self.activations[i], self.layer_norm, self.noisy_layer) out = Dense(self.action_size, self.out_activation, kernel_initializer=RandomUniform(-3e-3, 3e-3), bias_initializer=RandomUniform(-3e-3, 3e-3))(out) model = keras.models.Model(inputs=[input_state], outputs=out) return model
def build_model(self): input_state = keras.layers.Input(shape=self.state_shape, name='state_input') input_action = keras.layers.Input(shape=(self.action_size, ), name='action_input') input_size = self.get_input_size(self.state_shape) out = Reshape((input_size, ))(input_state) with tf.variable_scope(self.scope): for i in range(len(self.hiddens)): if (i == self.act_insert_block): out = Concatenate(axis=1)([out, input_action]) out = dense_block(out, self.hiddens[i], self.activations[i], self.layer_norm, self.noisy_layer) atoms = Dense(self.num_atoms, self.out_activation, kernel_initializer=RandomUniform(-3e-3, 3e-3), bias_initializer=RandomUniform(-3e-3, 3e-3))(out) model = keras.models.Model(inputs=[input_state, input_action], outputs=atoms) return model
def create_critic_network(self): sequence_length = tf.placeholder(tf.int32, shape=[None]) batch_state_x = Input(batch_shape=[None, None, self.s_dim]) batch_next_state_x = Input(batch_shape=[None, None, self.s_dim]) # state branch state_net = TimeDistributed(Dense(400))(batch_state_x) state_net = TimeDistributed(BatchNormalization())(state_net) state_net = TimeDistributed(Activation('relu'))(state_net) # next_state_branch next_state_net = TimeDistributed(Dense(400))(batch_next_state_x) next_state_net = TimeDistributed(BatchNormalization())(next_state_net) next_state_net = TimeDistributed(Activation('relu'))(next_state_net) # merge branches t1_layer = TimeDistributed(Dense(300)) t1_layer_out = t1_layer(state_net) t2_layer = TimeDistributed(Dense(300)) t2_layer_out = t2_layer(next_state_net) merged_net = Add()([t1_layer_out, t2_layer_out]) merged_net = TimeDistributed(Activation('relu'))(merged_net) # lstm cell rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) lstm_outputs, state = tf.nn.dynamic_rnn( rnn_cell, merged_net, sequence_length=sequence_length, time_major=False, dtype=tf.float32) # final dense layer w_init = RandomUniform(minval=-0.003, maxval=0.003) fc_layer = TimeDistributed( Dense(50, kernel_initializer=RandomUniform( minval=-0.003, maxval=0.003)))(lstm_outputs) last_layer = TimeDistributed(Dense(1, kernel_initializer=w_init)) batch_y = last_layer(fc_layer) return batch_state_x, batch_next_state_x, sequence_length, batch_y
def build_model(self): input_state = keras.layers.Input(shape=self.state_shape, name='state_input') input_size = self.get_input_size(self.state_shape) out = Reshape((input_size, ))(input_state) with tf.variable_scope(self.scope): for i in range(len(self.hiddens)): out = dense_block(out, self.hiddens[i], self.activations[i], self.layer_norm, self.noisy_layer) log_weight = Dense(self.K, None, kernel_initializer=RandomUniform(-3e-3, 3e-3), bias_initializer=RandomUniform(-3e-3, 3e-3))(out) mu = Dense(self.K * self.action_size, None, kernel_initializer=RandomUniform(-3e-3, 3e-3), bias_initializer=RandomUniform(-3e-3, 3e-3))(out) mu = Reshape((self.K, self.action_size))(mu) log_sig = Dense(self.K * self.action_size, None, kernel_initializer=RandomUniform(-3e-3, 3e-3), bias_initializer=RandomUniform(-3e-3, 3e-3))(out) log_sig = Reshape((self.K, self.action_size))(log_sig) model = keras.models.Model(inputs=[input_state], outputs=[log_weight, mu, log_sig]) return model
def create_critic_network(self): sequence_length = tf.placeholder(tf.int32, shape=[None]) inputs = Input(batch_shape=[None, None, self.s_dim]) action = Input(batch_shape=[None, None, self.a_dim]) net = TimeDistributed(Dense(400))(inputs) net = TimeDistributed(BatchNormalization())(net) net = TimeDistributed(Activation('relu'))(net) # Add the action tensor in the 2nd hidden layer # Use two temp layers to get the corresponding weights and biases t1 = TimeDistributed(Dense(300))(net) t2 = TimeDistributed(Dense(300))(action) net = Add()([t1, t2]) net = TimeDistributed(Activation('relu'))(net) rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) val, state = tf.nn.dynamic_rnn(rnn_cell, net, sequence_length=sequence_length, time_major=False, dtype=tf.float32) # linear layer connected to 1 output representing Q(s,a) # Weights are init to Uniform[-3e-3, 3e-3] out = TimeDistributed( Dense(1, kernel_initializer=RandomUniform(minval=-0.003, maxval=0.003, seed=None)))(val) # inputs = tflearn.input_data(shape=[None, 10, self.s_dim]) # action = tflearn.input_data(shape=[None, 10, self.a_dim]) # net = tflearn.time_distributed(inputs, tflearn.fully_connected, [400]) # net = tflearn.time_distributed(net, tflearn.layers.normalization.batch_normalization) # net = tflearn.time_distributed(net, tflearn.activations.relu) # # # Add the action tensor in the 2nd hidden layer # # Use two temp layers to get the corresponding weights and biases # t1 = tflearn.time_distributed(net, tflearn.fully_connected, [300]) # t2 = tflearn.time_distributed(action, tflearn.fully_connected, [300]) # # net = tflearn.merge([t1, t2], mode='elemwise_sum') # net = tflearn.time_distributed(net, tflearn.activations.relu) # # # linear layer connected to 1 output representing Q(s,a) # # Weights are init to Uniform[-3e-3, 3e-3] # w_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003) # out = tflearn.time_distributed(net, tflearn.fully_connected, [1, 'linear', True, w_init]) return inputs, action, sequence_length, out
def create_actor_model(self, state_input, root_net, action_dim, lr=0.001, dropout=0.3): """ net = Dense(64, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01), activation="relu")(root_net) """ net = Dense(40, activation="relu")(root_net) #net = Dropout(dropout)(net) if self.use_batch_norm: net = BatchNormalization()(net) """ net = Dense(64, input_dim=64, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01), activation="relu")(net) """ net = Flatten()(net) net = Dense(action_dim * 2, activation="sigmoid")(net) #net = Dropout(dropout)(net) if self.use_batch_norm: net = BatchNormalization()(net) # Final layer weights are init to Uniform[-3e-3, 3e-3] random_initializer = RandomUniform(minval=0.01, maxval=0.1, seed=None) # add input # @TODO add the previous action actor_out = Dense( action_dim, #kernel_initializer=random_initializer, #kernel_initializer="glorot_uniform", # for softmax #kernel_initializer="he_uniform", # for relu activation="softmax", # activation="relu", kernel_initializer=Constant(1.0 / action_dim), name="actor_out")(net) # actor_out = CustomActivation()(actor_out) #actor_out = Lambda(lambda x: tf.sigmoid(x) / (1e-5+ tf.norm(tf.sigmoid(x), axis=0, ord=1, keep_dims=True)))(actor_out) actor_model = Model(inputs=state_input, outputs=actor_out) if DEBUG: print("ACTOR MODEL :", actor_model.summary()) return actor_model
def fully_connected_neural_net(input_shape, classes, is_five_layers, activation, is_normalized): """ Creates 4 or 5 Layer Fully Connected Neural Network Input: - input_shape: int, shape of features of examples - classes: int, number of classes - activation: activation function for layers - 'tanh', 'sigmoid', 'softsign' - is_normalized: boolean, True for Glorot Initialization, False for Random Uniform Initialization - is_five_layers: boolean, True for 5 layer network, False for 4 layer network Returns: - model: model class created with specified inputs """ w_in = np.sqrt(0.001) initializer = RandomUniform(minval=-w_in, maxval=w_in) num_layers = 4 if is_normalized: initializer = glorot_uniform() if is_five_layers: num_layers = 5 x_input = Input(shape=(input_shape, )) x = Dense(units=1000, activation=activation, kernel_initializer=initializer)(x_input) # Model has two layers already outside of for loop i.e. Input to Dense_1 and Softmax Layer. # 2 is subtracting from num_layers to add appropriate number of layers for model i.e. 2 for 4 layer, and 3 for 5 layer. for _ in range(num_layers - 2): x = Dense(units=1000, activation=activation, kernel_initializer=initializer)(x) x_output = Dense(units=classes, activation='softmax', kernel_initializer=initializer)(x) model = Model(inputs=x_input, outputs=x_output) return model
def create_actor_network(self): sequence_length = tf.placeholder(tf.int32, shape=[None]) inputs = Input(batch_shape=[None, None, self.s_dim]) net = TimeDistributed(Dense(400))(inputs) net = TimeDistributed(BatchNormalization())(net) net = TimeDistributed(Activation('relu'))(net) net = TimeDistributed(Dense(300))(net) net = TimeDistributed(BatchNormalization())(net) net = TimeDistributed(Activation('relu'))(net) init_state = tf.placeholder(dtype=tf.float32, shape=[2, None, self.lstm_num_cells]) rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) initial_state = tf.nn.rnn_cell.LSTMStateTuple(init_state[0], init_state[1]) val, state = tf.nn.dynamic_rnn(rnn_cell, net, initial_state=initial_state, sequence_length=sequence_length, time_major=False, dtype=tf.float32) # Final layer weights are init to Uniform[-3e-3, 3e-3] out = TimeDistributed( Dense(self.a_dim, activation='tanh', kernel_initializer=RandomUniform(minval=-0.003, maxval=0.003, seed=None)))(val) scaled_out = tf.multiply(out, self.action_bound) # inputs = tflearn.input_data(shape=[None, None, self.s_dim]) # net = TimeDistributed(Dense(400))(inputs) # # net = tflearn.time_distributed(inputs, tflearn.fully_connected, [400]) # net = tflearn.time_distributed(net, tflearn.layers.normalization.batch_normalization) # net = tflearn.time_distributed(net, tflearn.activations.relu) # net = tflearn.time_distributed(net, tflearn.fully_connected, [300]) # net = tflearn.time_distributed(net, tflearn.layers.normalization.batch_normalization) # net = tflearn.time_distributed(net, tflearn.activations.relu) # # Final layer weights are init to Uniform[-3e-3, 3e-3] # w_init = tflearn.initializations.uniform(minval=-0.003, maxval=0.003) # out = tflearn.time_distributed(net, tflearn.fully_connected, [self.a_dim, 'tanh', True, w_init]) # # Scale output to -action_bound to action_bound # scaled_out = tf.multiply(out, self.action_bound) return inputs, sequence_length, init_state, state, out, scaled_out
def create_world_modeler_network(self): sequence_length = tf.placeholder(tf.int32, shape=[None]) batch_state_x = Input(batch_shape=[None, None, self.s_dim]) batch_action_x = Input(batch_shape=[None, None, self.a_dim]) # state branch state_net = TimeDistributed(Dense(400))(batch_state_x) state_net = TimeDistributed(BatchNormalization())(state_net) state_net = TimeDistributed(Activation('relu'))(state_net) # action branch action_net = TimeDistributed(Dense(400))(batch_action_x) action_net = TimeDistributed(BatchNormalization())(action_net) action_net = TimeDistributed(Activation('relu'))(action_net) # merge branches t1_layer = TimeDistributed(Dense(300)) t1_layer_out = t1_layer(state_net) t2_layer = TimeDistributed(Dense(300)) t2_layer_out = t2_layer(action_net) merged_net = Add()([t1_layer_out, t2_layer_out]) merged_net = TimeDistributed(Activation('relu'))(merged_net) # lstm cell rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) lstm_outputs, state = tf.nn.dynamic_rnn( rnn_cell, merged_net, sequence_length=sequence_length, time_major=False, dtype=tf.float32) # final dense layer w_init = RandomUniform(minval=-0.003, maxval=0.003) last_layer = TimeDistributed( Dense(self.s_dim, activation='tanh', kernel_initializer=w_init)) batch_y = last_layer(lstm_outputs) batch_y_scaled_out = tf.multiply(batch_y, self.state_bound) return batch_state_x, batch_action_x, sequence_length, batch_y, batch_y_scaled_out
def create_actor_network(self): ep_length = self.episode_length batch_state_x = Input(batch_shape=[None, ep_length, self.s_dim]) # state branch state_net = TimeDistributed(Dense(400, activation='relu'))(batch_state_x) # lstm cell rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) val, state = tf.nn.dynamic_rnn(rnn_cell, state_net, dtype=tf.float32) lstm_outputs = val # final dense layer w_init = RandomUniform(minval=-0.005, maxval=0.005) last_layer = Dense(self.a_dim) batch_action_y = last_layer(lstm_outputs) batch_action_y_scaled_out = tf.multiply(batch_action_y, self.action_bound) return batch_state_x, batch_action_y, batch_action_y_scaled_out
def create_actor_network(self): sequence_length = tf.placeholder(tf.int32, shape=[None]) batch_state_x = Input(batch_shape=[None, None, self.s_dim]) # state branch state_net = TimeDistributed(Dense(400))(batch_state_x) state_net = TimeDistributed(BatchNormalization())(state_net) state_net = TimeDistributed(Activation('relu'))(state_net) state_net = TimeDistributed(Dense(400))(state_net) state_net = TimeDistributed(BatchNormalization())(state_net) state_net = TimeDistributed(Activation('relu'))(state_net) # lstm cell init_state = tf.placeholder(dtype=tf.float32, shape=[2, None, self.lstm_num_cells]) state = init_state rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) initial_state = tf.nn.rnn_cell.LSTMStateTuple(init_state[0], init_state[1]) val, state = tf.nn.dynamic_rnn(rnn_cell, state_net, initial_state=initial_state, sequence_length=sequence_length, time_major=False, dtype=tf.float32) lstm_outputs = val # final dense layer # w_init = RandomUniform(minval=-0.005, maxval=0.005) # last_layer = TimeDistributed(Dense(self.a_dim, activation='tanh')) batch_action_y = TimeDistributed( Dense(self.a_dim, activation='tanh', kernel_initializer=RandomUniform( minval=-0.003, maxval=0.003)))(lstm_outputs) batch_action_y_scaled_out = tf.multiply(batch_action_y, self.action_bound) return batch_state_x, sequence_length, init_state, state, batch_action_y, batch_action_y_scaled_out
def create_critic_network(self): ep_length = self.episode_length batch_state_x = Input(batch_shape=[None, ep_length, self.s_dim]) batch_action_x = Input(batch_shape=[None, ep_length, self.a_dim]) # state branch state_net = TimeDistributed(Dense(400))(batch_state_x) state_net = BatchNormalization()(state_net) state_net = Activation('relu')(state_net) # action branch action_net = TimeDistributed(Dense(400))(batch_action_x) action_net = BatchNormalization()(action_net) action_net = Activation('relu')(action_net) # merge branches t1_layer = TimeDistributed(Dense(400)) t1_layer_out = t1_layer(state_net) t2_layer = TimeDistributed(Dense(400)) t2_layer_out = t2_layer(action_net) state_net_reshaped = tf.reshape(state_net, shape=[-1, 400]) action_net_reshaped = tf.reshape(action_net, shape=[-1, 400]) merged_net = tf.matmul(state_net_reshaped, t1_layer.get_weights()[0]) + tf.matmul(action_net_reshaped, t2_layer.get_weights()[0]) \ + t1_layer.get_weights()[1] + t2_layer.get_weights()[1] merged_net = Activation('relu')(merged_net) merged_net = tf.reshape(merged_net, shape=[-1, ep_length, 400]) # lstm cell rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) val, state = tf.nn.dynamic_rnn(rnn_cell, merged_net, dtype=tf.float32) lstm_outputs = val # final dense layer w_init = RandomUniform(minval=-0.003, maxval=0.003) last_layer = Dense(1) batch_y = last_layer(lstm_outputs) return batch_state_x, batch_action_x, batch_y
def create_world_modeler_network(self): ep_length = self.episode_length batchStateX = Input(batch_shape=[None, ep_length, self.s_dim]) batchActionX = Input(batch_shape=[None, ep_length, self.a_dim]) # state branch state_net = TimeDistributed(Dense(400, activation='relu'))(batchStateX) # action branch action_net = TimeDistributed(Dense(400, activation='relu'))(batchActionX) # merge branches t1_layer = TimeDistributed(Dense(400)) t1_layer_out = t1_layer(state_net) t2_layer = TimeDistributed(Dense(400)) t2_layer_out = t2_layer(action_net) state_net_reshaped = tf.reshape(state_net, shape=[-1, 400]) action_net_reshaped = tf.reshape(action_net, shape=[-1, 400]) merged_net = tf.matmul(state_net_reshaped, t1_layer.get_weights()[0]) + tf.matmul(action_net_reshaped, t2_layer.get_weights()[0])\ + t1_layer.get_weights()[1] + t2_layer.get_weights()[1] merged_net = Activation('relu')(merged_net) merged_net = tf.reshape(merged_net, shape=[-1, ep_length, 400]) # lstm cell rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) val, state = tf.nn.dynamic_rnn(rnn_cell, merged_net, dtype=tf.float32) lstm_outputs = val # final dense layer w_init = RandomUniform(minval=-0.005, maxval=0.005) last_layer = Dense(self.s_dim) batchStateY = last_layer(lstm_outputs) batchStateY_scaled_out = tf.multiply(batchStateY, self.state_bound) return batchStateX, batchActionX, batchStateY, batchStateY_scaled_out
# Keras may reduce this across the first axis (the batch) # but the semantics are unclear, so to be sure we use # the loss across the entire tensor, we reduce it to a # single scalar with the mean function. loss_mean = tf.reduce_mean(loss) return loss_mean # an lstm to a gru to a dense output model = Sequential() model.add(GRU(units=50, return_sequences=True, input_shape=(None, num_x_signals,))) # model.add(Dropout(0.2)) # model.add(LSTM(100, return_sequences=True)) # model.add(Dropout(0.2)) init = RandomUniform(minval=-0.1, maxval=0.1) model.add(Dense(num_y_signals, activation='linear', kernel_initializer=init)) # activation='sigmoid' optimizer = SGD(lr=1e-3) model.compile(loss=loss_mse_warmup, optimizer=optimizer) print(model.summary()) path_checkpoint = 'twitter_checkpoint.keras' callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True)
def __init__(self, data, sequence_length=20, warmup_steps=50, dropout=0, layers=1, patience=10, units=512, display=False): """Instantiate the class. Args: data: Tuple of (x_data, y_data, target_names) batch_size: Size of batch sequence_length: Length of vectors for for each target warmup_steps: Returns: None """ # Initialize key variables self._warmup_steps = warmup_steps self._data = data self.display = display path_checkpoint = '/tmp/checkpoint.keras' _layers = int(abs(layers)) # Delete any stale checkpoint file if os.path.exists(path_checkpoint) is True: os.remove(path_checkpoint) ################################### # TensorFlow wizardry config = tf.ConfigProto() # Don't pre-allocate memory; allocate as-needed config.gpu_options.allow_growth = True # Only allow a total of half the GPU memory to be allocated config.gpu_options.per_process_gpu_memory_fraction = 0.95 # Crash with DeadlineExceeded instead of hanging forever when your # queues get full/empty config.operation_timeout_in_ms = 60000 # Create a session with the above options specified. backend.tensorflow_backend.set_session(tf.Session(config=config)) ################################### # Get data self._y_current = self._data.close() # Create training arrays x_train = self._data.vectors_train() self._y_train = self._data.classes_train() # Create test arrays for VALIDATION and EVALUATION xv_test = self._data.vectors_test() self._yv_test = self._data.classes_test() (self.training_rows, self._training_vector_count) = x_train.shape (self.test_rows, _) = xv_test.shape (_, self._training_class_count) = self._y_train.shape # Print stuff print('\n> Numpy Data Type: {}'.format(type(x_train))) print("> Numpy Data Shape: {}".format(x_train.shape)) print("> Numpy Data Row[0]: {}".format(x_train[0])) print("> Numpy Data Row[Last]: {}".format(x_train[-1])) print('> Numpy Targets Type: {}'.format(type(self._y_train))) print("> Numpy Targets Shape: {}".format(self._y_train.shape)) print('> Number of Samples: {}'.format(self._y_current.shape[0])) print('> Number of Training Samples: {}'.format(x_train.shape[0])) print('> Number of Training Classes: {}'.format( self._training_class_count)) print('> Number of Test Samples: {}'.format(self.test_rows)) print("> Training Minimum Value:", np.min(x_train)) print("> Training Maximum Value:", np.max(x_train)) print('> Number X signals: {}'.format(self._training_vector_count)) print('> Number Y signals: {}'.format(self._training_class_count)) # Print epoch related data print('> Epochs:', self._data.epochs()) print('> Batch Size:', self._data.batch_size()) print('> Steps:', self._data.epoch_steps()) # Display estimated memory footprint of training data. print("> Data size: {:.2f} Bytes".format(x_train.nbytes)) ''' The neural network works best on values roughly between -1 and 1, so we need to scale the data before it is being input to the neural network. We can use scikit-learn for this. We first create a scaler-object for the input-signals. Then we detect the range of values from the training-data and scale the training-data. ''' self._x_scaler = MinMaxScaler() self._x_train_scaled = self._x_scaler.fit_transform(x_train) print('> Scaled Training Minimum Value: {}'.format( np.min(self._x_train_scaled))) print('> Scaled Training Maximum Value: {}'.format( np.max(self._x_train_scaled))) self._xv_test_scaled = self._x_scaler.transform(xv_test) ''' The target-data comes from the same data-set as the input-signals, because it is the weather-data for one of the cities that is merely time-shifted. But the target-data could be from a different source with different value-ranges, so we create a separate scaler-object for the target-data. ''' self._y_scaler = MinMaxScaler() self._y_train_scaled = self._y_scaler.fit_transform(self._y_train) yv_test_scaled = self._y_scaler.transform(self._yv_test) # Data Generator ''' The data-set has now been prepared as 2-dimensional numpy arrays. The training-data has almost 300k observations, consisting of 20 input-signals and 3 output-signals. These are the array-shapes of the input and output data: ''' print('> Scaled Training Data Shape: {}'.format( self._x_train_scaled.shape)) print('> Scaled Training Targets Shape: {}'.format( self._y_train_scaled.shape)) # We then create the batch-generator. generator = self._batch_generator(self._data.batch_size(), sequence_length) # Validation Set ''' The neural network trains quickly so we can easily run many training epochs. But then there is a risk of overfitting the model to the training-set so it does not generalize well to unseen data. We will therefore monitor the model's performance on the test-set after each epoch and only save the model's weights if the performance is improved on the test-set. The batch-generator randomly selects a batch of short sequences from the training-data and uses that during training. But for the validation-data we will instead run through the entire sequence from the test-set and measure the prediction accuracy on that entire sequence. ''' validation_data = (np.expand_dims(self._xv_test_scaled, axis=0), np.expand_dims(yv_test_scaled, axis=0)) # Create the Recurrent Neural Network self._model = Sequential() ''' We can now add a Gated Recurrent Unit (GRU) to the network. This will have 512 outputs for each time-step in the sequence. Note that because this is the first layer in the model, Keras needs to know the shape of its input, which is a batch of sequences of arbitrary length (indicated by None), where each observation has a number of input-signals (num_x_signals). ''' self._model.add( GRU(units=units, return_sequences=True, recurrent_dropout=dropout, input_shape=( None, self._training_vector_count, ))) for _ in range(0, _layers): self._model.add( GRU(units=units, recurrent_dropout=dropout, return_sequences=True)) ''' The GRU outputs a batch of sequences of 512 values. We want to predict 3 output-signals, so we add a fully-connected (or dense) layer which maps 512 values down to only 3 values. The output-signals in the data-set have been limited to be between 0 and 1 using a scaler-object. So we also limit the output of the neural network using the Sigmoid activation function, which squashes the output to be between 0 and 1.''' self._model.add(Dense(self._training_class_count, activation='sigmoid')) ''' A problem with using the Sigmoid activation function, is that we can now only output values in the same range as the training-data. For example, if the training-data only has temperatures between -20 and +30 degrees, then the scaler-object will map -20 to 0 and +30 to 1. So if we limit the output of the neural network to be between 0 and 1 using the Sigmoid function, this can only be mapped back to temperature values between -20 and +30. We can use a linear activation function on the output instead. This allows for the output to take on arbitrary values. It might work with the standard initialization for a simple network architecture, but for more complicated network architectures e.g. with more layers, it might be necessary to initialize the weights with smaller values to avoid NaN values during training. You may need to experiment with this to get it working. ''' if False: # Maybe use lower init-ranges. # init = RandomUniform(minval=-0.05, maxval=0.05) init = RandomUniform(minval=-0.05, maxval=0.05) self._model.add( Dense(self._training_class_count, activation='linear', kernel_initializer=init)) # Compile Model ''' This is the optimizer and the beginning learning-rate that we will use. We then compile the Keras model so it is ready for training. ''' optimizer = RMSprop(lr=1e-3) self._model.compile(loss=self._loss_mse_warmup, optimizer=optimizer, metrics=['accuracy']) ''' This is a very small model with only two layers. The output shape of (None, None, 3) means that the model will output a batch with an arbitrary number of sequences, each of which has an arbitrary number of observations, and each observation has 3 signals. This corresponds to the 3 target signals we want to predict. ''' print('> Model Summary:\n') print(self._model.summary()) # Callback Functions ''' During training we want to save checkpoints and log the progress to TensorBoard so we create the appropriate callbacks for Keras. This is the callback for writing checkpoints during training. ''' callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) ''' This is the callback for stopping the optimization when performance worsens on the validation-set. ''' callback_early_stopping = EarlyStopping(monitor='val_loss', patience=patience, verbose=1) ''' This is the callback for writing the TensorBoard log during training. ''' callback_tensorboard = TensorBoard(log_dir='/tmp/23_logs/', histogram_freq=0, write_graph=False) ''' This callback reduces the learning-rate for the optimizer if the validation-loss has not improved since the last epoch (as indicated by patience=0). The learning-rate will be reduced by multiplying it with the given factor. We set a start learning-rate of 1e-3 above, so multiplying it by 0.1 gives a learning-rate of 1e-4. We don't want the learning-rate to go any lower than this. ''' callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1) callbacks = [ callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr ] # Train the Recurrent Neural Network '''We can now train the neural network. Note that a single "epoch" does not correspond to a single processing of the training-set, because of how the batch-generator randomly selects sub-sequences from the training-set. Instead we have selected steps_per_epoch so that one "epoch" is processed in a few minutes. With these settings, each "epoch" took about 2.5 minutes to process on a GTX 1070. After 14 "epochs" the optimization was stopped because the validation-loss had not decreased for 5 "epochs". This optimization took about 35 minutes to finish. Also note that the loss sometimes becomes NaN (not-a-number). This is often resolved by restarting and running the Notebook again. But it may also be caused by your neural network architecture, learning-rate, batch-size, sequence-length, etc. in which case you may have to modify those settings. ''' print('\n> Starting data training\n') self._history = self._model.fit_generator( generator=generator, epochs=self._data.epochs(), steps_per_epoch=self._data.epoch_steps(), validation_data=validation_data, callbacks=callbacks) # Load Checkpoint ''' Because we use early-stopping when training the model, it is possible that the model's performance has worsened on the test-set for several epochs before training was stopped. We therefore reload the last saved checkpoint, which should have the best performance on the test-set. ''' print('> Loading model weights') if os.path.exists(path_checkpoint): self._model.load_weights(path_checkpoint) # Performance on Test-Set ''' We can now evaluate the model's performance on the test-set. This function expects a batch of data, but we will just use one long time-series for the test-set, so we just expand the array-dimensionality to create a batch with that one sequence. ''' result = self._model.evaluate(x=np.expand_dims(self._xv_test_scaled, axis=0), y=np.expand_dims(yv_test_scaled, axis=0)) print('> Loss (test-set): {}'.format(result)) # If you have several metrics you can use this instead. if False: for res, metric in zip(result, self._model.metrics_names): print('{0}: {1:.3e}'.format(metric, res))
TrainSet[i].append(x5ar[i] * 0.01) TrainSet[i].append(x6ar[i] * 0.01) TrainSet[i].append(x7ar[i] * 0.01) TrainSet[i].append(x8ar[i] * 0.01) TrainSet[i].append(x9ar[i] * 0.01) TrainSet[i].append(x10ar[i] * 0.01) TrainSet[i].append(x11ar[i] * 0.01) TrainSet[i].append(x12ar[i] * 0.01) TrainSet[i].append(x13ar[i] * 0.01) S = np.array(TrainSet) model = Sequential() model.add( Dense(1, kernel_initializer=RandomUniform(minval=1, maxval=1), input_dim=13, activation='sigmoid'), ) sgd = optimizers.SGD(lr=0.01) model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['binary_accuracy']) # stochastic gradient decent algorithm will be used for optimization process # Loss function will be binary_crossentropy history = model.fit(S, yar[0:TrainLen], batch_size=640, epochs=2000, shuffle=False) # 2000 epochs
# [batch_size, sequence_length - warmup_steps, num_y_signals] # Calculate the MSE loss for each value in these tensors. # This outputs a 3-rank tensor of the same shape. loss = tf.losses.mean_squared_error(labels=y_true_slice, predictions=y_pred_slice) # Keras may reduce this across the first axis (the batch) # but the semantics are unclear, so to be sure we use # the loss across the entire tensor, we reduce it to a # single scalar with the mean function. loss_mean = tf.reduce_mean(loss) return loss_mean init = RandomUniform(minval=-0.05, maxval=0.05) fit = True #def batch size batch_size = 64 sequence_length = 100 #data setup result_2 =[] ##data setup nrow=10000 train = pd.read_csv("data/processed.csv", nrows=nrow) df = train #filter df target_var = ["answered_correctly"]
def __init__(self, batch_size=64, sequence_length=20, warmup_steps=50, epochs=20, display=False): """Instantiate the class. Args: batch_size: Size of batch sequence_length: Length of vectors for for each target save: Save charts if True Returns: None """ # Initialize key variables self.target_names = ['Temp', 'WindSpeed', 'Pressure'] self.warmup_steps = warmup_steps self.epochs = epochs self.batch_size = batch_size self.display = display # Get data x_data, y_data = self.data() print('\n> Numpy Data Type: {}'.format(type(x_data))) print("> Numpy Data Shape: {}".format(x_data.shape)) print("> Numpy Data Row[0]: {}".format(x_data[0])) print('> Numpy Targets Type: {}'.format(type(y_data))) print("> Numpy Targets Shape: {}".format(y_data.shape)) ''' This is the number of observations (aka. data-points or samples) in the data-set: ''' num_data = len(x_data) ''' This is the fraction of the data-set that will be used for the training-set: ''' train_split = 0.9 ''' This is the number of observations in the training-set: ''' self.num_train = int(train_split * num_data) ''' This is the number of observations in the test-set: ''' num_test = num_data - self.num_train print('> Number of Samples: {}'.format(num_data)) print("> Number of Training Samples: {}".format(self.num_train)) print("> Number of Test Samples: {}".format(num_test)) print("> Batch Size: {}".format(batch_size)) steps_per_epoch = int(self.num_train / batch_size) print("> Recommended Epoch Steps: {:.2f}".format(steps_per_epoch)) # Create test and training data x_train = x_data[0:self.num_train] x_test = x_data[self.num_train:] self.y_train = y_data[0:self.num_train] self.y_test = y_data[self.num_train:] self.num_x_signals = x_data.shape[1] self.num_y_signals = y_data.shape[1] print("> Training Minimum Value:", np.min(x_train)) print("> Training Maximum Value:", np.max(x_train)) ''' The neural network works best on values roughly between -1 and 1, so we need to scale the data before it is being input to the neural network. We can use scikit-learn for this. We first create a scaler-object for the input-signals. Then we detect the range of values from the training-data and scale the training-data. ''' x_scaler = MinMaxScaler() self.x_train_scaled = x_scaler.fit_transform(x_train) print('> Scaled Training Minimum Value: {}'.format( np.min(self.x_train_scaled))) print('> Scaled Training Maximum Value: {}'.format( np.max(self.x_train_scaled))) self.x_test_scaled = x_scaler.transform(x_test) ''' The target-data comes from the same data-set as the input-signals, because it is the weather-data for one of the cities that is merely time-shifted. But the target-data could be from a different source with different value-ranges, so we create a separate scaler-object for the target-data. ''' self.y_scaler = MinMaxScaler() self.y_train_scaled = self.y_scaler.fit_transform(self.y_train) y_test_scaled = self.y_scaler.transform(self.y_test) # Data Generator ''' The data-set has now been prepared as 2-dimensional numpy arrays. The training-data has almost 300k observations, consisting of 20 input-signals and 3 output-signals. These are the array-shapes of the input and output data: ''' print('> Scaled Training Data Shape: {}'.format( self.x_train_scaled.shape)) print('> Scaled Training Targets Shape: {}'.format( self.y_train_scaled.shape)) # We then create the batch-generator. generator = self.batch_generator(batch_size, sequence_length) # Validation Set ''' The neural network trains quickly so we can easily run many training epochs. But then there is a risk of overfitting the model to the training-set so it does not generalize well to unseen data. We will therefore monitor the model's performance on the test-set after each epoch and only save the model's weights if the performance is improved on the test-set. The batch-generator randomly selects a batch of short sequences from the training-data and uses that during training. But for the validation-data we will instead run through the entire sequence from the test-set and measure the prediction accuracy on that entire sequence. ''' validation_data = (np.expand_dims(self.x_test_scaled, axis=0), np.expand_dims(y_test_scaled, axis=0)) # Create the Recurrent Neural Network self.model = Sequential() ''' We can now add a Gated Recurrent Unit (GRU) to the network. This will have 512 outputs for each time-step in the sequence. Note that because this is the first layer in the model, Keras needs to know the shape of its input, which is a batch of sequences of arbitrary length (indicated by None), where each observation has a number of input-signals (num_x_signals). ''' self.model.add( GRU(units=512, return_sequences=True, input_shape=( None, self.num_x_signals, ))) ''' The GRU outputs a batch of sequences of 512 values. We want to predict 3 output-signals, so we add a fully-connected (or dense) layer which maps 512 values down to only 3 values. The output-signals in the data-set have been limited to be between 0 and 1 using a scaler-object. So we also limit the output of the neural network using the Sigmoid activation function, which squashes the output to be between 0 and 1.''' self.model.add(Dense(self.num_y_signals, activation='sigmoid')) ''' A problem with using the Sigmoid activation function, is that we can now only output values in the same range as the training-data. For example, if the training-data only has temperatures between -20 and +30 degrees, then the scaler-object will map -20 to 0 and +30 to 1. So if we limit the output of the neural network to be between 0 and 1 using the Sigmoid function, this can only be mapped back to temperature values between -20 and +30. We can use a linear activation function on the output instead. This allows for the output to take on arbitrary values. It might work with the standard initialization for a simple network architecture, but for more complicated network architectures e.g. with more layers, it might be necessary to initialize the weights with smaller values to avoid NaN values during training. You may need to experiment with this to get it working. ''' if False: # Maybe use lower init-ranges. init = RandomUniform(minval=-0.05, maxval=0.05) self.model.add( Dense(self.num_y_signals, activation='linear', kernel_initializer=init)) # Compile Model ''' This is the optimizer and the beginning learning-rate that we will use. We then compile the Keras model so it is ready for training. ''' optimizer = RMSprop(lr=1e-3) self.model.compile(loss=self.loss_mse_warmup, optimizer=optimizer) ''' This is a very small model with only two layers. The output shape of (None, None, 3) means that the model will output a batch with an arbitrary number of sequences, each of which has an arbitrary number of observations, and each observation has 3 signals. This corresponds to the 3 target signals we want to predict. ''' print('> Model Summary:\n') print(self.model.summary()) # Callback Functions ''' During training we want to save checkpoints and log the progress to TensorBoard so we create the appropriate callbacks for Keras. This is the callback for writing checkpoints during training. ''' path_checkpoint = '/tmp/23_checkpoint.keras' callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) ''' This is the callback for stopping the optimization when performance worsens on the validation-set. ''' callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1) ''' This is the callback for writing the TensorBoard log during training. ''' callback_tensorboard = TensorBoard(log_dir='/tmp/23_logs/', histogram_freq=0, write_graph=False) ''' This callback reduces the learning-rate for the optimizer if the validation-loss has not improved since the last epoch (as indicated by patience=0). The learning-rate will be reduced by multiplying it with the given factor. We set a start learning-rate of 1e-3 above, so multiplying it by 0.1 gives a learning-rate of 1e-4. We don't want the learning-rate to go any lower than this. ''' callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1) callbacks = [ callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr ] # Train the Recurrent Neural Network '''We can now train the neural network. Note that a single "epoch" does not correspond to a single processing of the training-set, because of how the batch-generator randomly selects sub-sequences from the training-set. Instead we have selected steps_per_epoch so that one "epoch" is processed in a few minutes. With these settings, each "epoch" took about 2.5 minutes to process on a GTX 1070. After 14 "epochs" the optimization was stopped because the validation-loss had not decreased for 5 "epochs". This optimization took about 35 minutes to finish. Also note that the loss sometimes becomes NaN (not-a-number). This is often resolved by restarting and running the Notebook again. But it may also be caused by your neural network architecture, learning-rate, batch-size, sequence-length, etc. in which case you may have to modify those settings. ''' self.model.fit_generator(generator=generator, epochs=self.epochs, steps_per_epoch=steps_per_epoch, validation_data=validation_data, callbacks=callbacks) # Load Checkpoint ''' Because we use early-stopping when training the model, it is possible that the model's performance has worsened on the test-set for several epochs before training was stopped. We therefore reload the last saved checkpoint, which should have the best performance on the test-set. ''' try: self.model.load_weights(path_checkpoint) except Exception as error: print('\n> Error trying to load checkpoint.\n\n{}'.format(error)) sys.exit(0) # Performance on Test-Set ''' We can now evaluate the model's performance on the test-set. This function expects a batch of data, but we will just use one long time-series for the test-set, so we just expand the array-dimensionality to create a batch with that one sequence. ''' result = self.model.evaluate(x=np.expand_dims(self.x_test_scaled, axis=0), y=np.expand_dims(y_test_scaled, axis=0)) print('> Loss (test-set): {}'.format(result)) # If you have several metrics you can use this instead. if False: for res, metric in zip(result, self.model.metrics_names): print('{0}: {1:.3e}'.format(metric, res))
def model(self, params=None): """Create the Recurrent Neural Network. Args: None Returns: _model: RNN model """ # Initialize key variables if params is None: _hyperparameters = self.hyperparameters else: _hyperparameters = params # Calculate the steps per epoch epoch_steps = int( self.training_rows / _hyperparameters['batch_size']) + 1 # Create the model object _model = Sequential() ''' We can now add a Gated Recurrent Unit (GRU) to the network. This will have 512 outputs for each time-step in the sequence. Note that because this is the first layer in the model, Keras needs to know the shape of its input, which is a batch of sequences of arbitrary length (indicated by None), where each observation has a number of input-signals (num_x_signals). ''' _model.add( GRU(units=_hyperparameters['units'], return_sequences=True, recurrent_dropout=_hyperparameters['dropout'], input_shape=( None, self._training_vector_count, ))) for _ in range(1, _hyperparameters['layers']): _model.add( GRU(units=_hyperparameters['units'], recurrent_dropout=_hyperparameters['dropout'], return_sequences=True)) ''' The GRU outputs a batch of sequences of 512 values. We want to predict 3 output-signals, so we add a fully-connected (or dense) layer which maps 512 values down to only 3 values. The output-signals in the data-set have been limited to be between 0 and 1 using a scaler-object. So we also limit the output of the neural network using the Sigmoid activation function, which squashes the output to be between 0 and 1. ''' _model.add(Dense(self._training_class_count, activation='sigmoid')) ''' A problem with using the Sigmoid activation function, is that we can now only output values in the same range as the training-data. For example, if the training-data only has values between -20 and +30, then the scaler-object will map -20 to 0 and +30 to 1. So if we limit the output of the neural network to be between 0 and 1 using the Sigmoid function, this can only be mapped back to values between -20 and +30. We can use a linear activation function on the output instead. This allows for the output to take on arbitrary values. It might work with the standard initialization for a simple network architecture, but for more complicated network architectures e.g. with more layers, it might be necessary to initialize the weights with smaller values to avoid NaN values during training. You may need to experiment with this to get it working. ''' if False: # Maybe use lower init-ranges. init = RandomUniform(minval=-0.05, maxval=0.05) _model.add( Dense(self._training_class_count, activation='linear', kernel_initializer=init)) # Compile Model ''' This is the optimizer and the beginning learning-rate that we will use. We then compile the Keras model so it is ready for training. ''' optimizer = RMSprop(lr=1e-3) _model.compile(loss=self._loss_mse_warmup, optimizer=optimizer, metrics=['accuracy']) ''' This is a very small model with only two layers. The output shape of (None, None, 3) means that the model will output a batch with an arbitrary number of sequences, each of which has an arbitrary number of observations, and each observation has 3 signals. This corresponds to the 3 target signals we want to predict. ''' print('\n> Model Summary:\n') print(_model.summary()) # Create the batch-generator. generator = self._batch_generator(_hyperparameters['batch_size'], _hyperparameters['sequence_length']) # Validation Set ''' The neural network trains quickly so we can easily run many training epochs. But then there is a risk of overfitting the model to the training-set so it does not generalize well to unseen data. We will therefore monitor the model's performance on the test-set after each epoch and only save the model's weights if the performance is improved on the test-set. The batch-generator randomly selects a batch of short sequences from the training-data and uses that during training. But for the validation-data we will instead run through the entire sequence from the test-set and measure the prediction accuracy on that entire sequence. ''' validation_data = (np.expand_dims(self._x_validation_scaled, axis=0), np.expand_dims(self._y_validation_scaled, axis=0)) # Callback Functions ''' During training we want to save checkpoints and log the progress to TensorBoard so we create the appropriate callbacks for Keras. This is the callback for writing checkpoints during training. ''' callback_checkpoint = ModelCheckpoint(filepath=self._path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) ''' This is the callback for stopping the optimization when performance worsens on the validation-set. ''' callback_early_stopping = EarlyStopping( monitor='val_loss', patience=_hyperparameters['patience'], verbose=1) ''' This is the callback for writing the TensorBoard log during training. ''' callback_tensorboard = TensorBoard(log_dir='/tmp/23_logs/', histogram_freq=0, write_graph=False) ''' This callback reduces the learning-rate for the optimizer if the validation-loss has not improved since the last epoch (as indicated by patience=0). The learning-rate will be reduced by multiplying it with the given factor. We set a start learning-rate of 1e-3 above, so multiplying it by 0.1 gives a learning-rate of 1e-4. We don't want the learning-rate to go any lower than this. ''' callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1) callbacks = [ callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr ] # Train the Recurrent Neural Network '''We can now train the neural network. Note that a single "epoch" does not correspond to a single processing of the training-set, because of how the batch-generator randomly selects sub-sequences from the training-set. Instead we have selected steps_per_epoch so that one "epoch" is processed in a few minutes. With these settings, each "epoch" took about 2.5 minutes to process on a GTX 1070. After 14 "epochs" the optimization was stopped because the validation-loss had not decreased for 5 "epochs". This optimization took about 35 minutes to finish. Also note that the loss sometimes becomes NaN (not-a-number). This is often resolved by restarting and running the Notebook again. But it may also be caused by your neural network architecture, learning-rate, batch-size, sequence-length, etc. in which case you may have to modify those settings. ''' print('\n> Parameters for training\n') pprint(_hyperparameters) print('\n> Starting data training\n') _model.fit_generator(generator=generator, epochs=_hyperparameters['epochs'], steps_per_epoch=epoch_steps, validation_data=validation_data, callbacks=callbacks) # Return return _model
def model(self, params=None): normal = self._data.split() scaled = self._data.scaled_split() (training_rows, x_feature_count) = normal.x_train.shape (_, y_feature_count) = normal.y_train.shape # Allow overriding parameters if params is None: _hyperparameters = self._hyperparameters else: _hyperparameters = params _hyperparameters.batch_size = int(_hyperparameters.batch_size * self._gpus) Generator = namedtuple( 'Generator', '''batch_size, sequence_length, x_feature_count, y_feature_count, \ training_rows, y_train_scaled, x_train_scaled''') generator = _batch_generator( Generator(batch_size=_hyperparameters.batch_size, sequence_length=_hyperparameters.sequence_length, x_feature_count=x_feature_count, y_feature_count=y_feature_count, training_rows=training_rows, y_train_scaled=scaled.y_train, x_train_scaled=scaled.x_train)) validation_data = (np.expand_dims(scaled.x_train, axis=0), np.expand_dims(scaled.y_train, axis=0)) with tf.device('/:GPU:0'): model = Sequential() model.add( GRU(_hyperparameters.units, return_sequences=True, recurrent_dropout=_hyperparameters.dropout, input_shape=(None, x_feature_count))) model.add(Dense(y_feature_count, activation='sigmoid')) if False: from tensorflow.python.keras.initializers import RandomUniform # Maybe use lower init-ranges. init = RandomUniform(minval=-0.05, maxval=0.05) model.add( Dense(y_feature_count, activation='linear', kernel_initializer=init)) optimizer = RMSprop(lr=1e-3) model.compile(loss=self.loss_mse_warmup, optimizer=optimizer, metrics=['accuracy']) model.summary() path_checkpoint = '/tmp/hvass_checkpoint.keras' callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1) callback_tensorboard = TensorBoard(log_dir='/tmp/hvass_logs/', histogram_freq=0, write_graph=False) callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1) callbacks = [ callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr ] model.fit(x=generator, epochs=20, steps_per_epoch=100, validation_data=validation_data, callbacks=callbacks)
def _model(self): """Create the Recurrent Neural Network. Args: None Returns: _model: RNN model """ # Create the model object _model = Sequential() ''' We can now add a Gated Recurrent Unit (GRU) to the network. This will have 512 outputs for each time-step in the sequence. Note that because this is the first layer in the model, Keras needs to know the shape of its input, which is a batch of sequences of arbitrary length (indicated by None), where each observation has a number of input-signals (num_x_signals). ''' _model.add( GRU(units=self._units, return_sequences=True, recurrent_dropout=self._dropout, input_shape=( None, self._training_vector_count, ))) for _ in range(0, self._layers): _model.add( GRU(units=self._units, recurrent_dropout=self._dropout, return_sequences=True)) ''' The GRU outputs a batch of sequences of 512 values. We want to predict 3 output-signals, so we add a fully-connected (or dense) layer which maps 512 values down to only 3 values. The output-signals in the data-set have been limited to be between 0 and 1 using a scaler-object. So we also limit the output of the neural network using the Sigmoid activation function, which squashes the output to be between 0 and 1.''' _model.add(Dense(self._training_class_count, activation='sigmoid')) ''' A problem with using the Sigmoid activation function, is that we can now only output values in the same range as the training-data. For example, if the training-data only has temperatures between -20 and +30 degrees, then the scaler-object will map -20 to 0 and +30 to 1. So if we limit the output of the neural network to be between 0 and 1 using the Sigmoid function, this can only be mapped back to temperature values between -20 and +30. We can use a linear activation function on the output instead. This allows for the output to take on arbitrary values. It might work with the standard initialization for a simple network architecture, but for more complicated network architectures e.g. with more layers, it might be necessary to initialize the weights with smaller values to avoid NaN values during training. You may need to experiment with this to get it working. ''' if False: # Maybe use lower init-ranges. # init = RandomUniform(minval=-0.05, maxval=0.05) init = RandomUniform(minval=-0.05, maxval=0.05) _model.add( Dense(self._training_class_count, activation='linear', kernel_initializer=init)) # Compile Model ''' This is the optimizer and the beginning learning-rate that we will use. We then compile the Keras model so it is ready for training. ''' optimizer = RMSprop(lr=1e-3) _model.compile(loss=self._loss_mse_warmup, optimizer=optimizer, metrics=['accuracy']) ''' This is a very small model with only two layers. The output shape of (None, None, 3) means that the model will output a batch with an arbitrary number of sequences, each of which has an arbitrary number of observations, and each observation has 3 signals. This corresponds to the 3 target signals we want to predict. ''' print('> Model Summary:\n') print(_model.summary()) # Return return _model
def forecast(checkpoint_path, data, target_date): # Data preprocessing data = pd.read_csv(data) target_names = list(data) data = data.values data = data[:, 2:] data = data[:, :-1:] x_scaler = MinMaxScaler() # Choosing only last 0.1 times steps as input data for forecasting train_split = 0.9 num_train = int(train_split * len(data)) num_test = len(data) - num_train x_train = data[0:num_train] x_test = data[num_train:] num_x_signals = x_train.shape[1] num_y_signals = num_x_signals x_train_scaled = x_scaler.fit_transform(x_train) x_test_scaled = x_scaler.transform(x_test) # y_scaler = MinMaxScaler() # y_train_scaled = y_scaler.fit_transform(y_train) # y_test_scaled = y_scaler.transform(y_test) # Load the model model = Sequential() model.add( GRU(units=512, return_sequences=True, input_shape=( None, num_x_signals, ))) model.add(Dense(num_y_signals, activation='sigmoid')) if False: from tensorflow.python.keras.initializers import RandomUniform init = RandomUniform(minval=-0.05, maxval=0.05) # Output later initializer model.add( Dense(num_y_signals, activation='linear', kernel_initializer=init)) warmup_steps = 50 # Load the weights # Note that above initialized weights will be ignored try: model.load_weights(checkpoint_path) except Exception as error: print("Error trying to load checkpoint.") print(error) # PREDICT # Start idx should be current time step start_idx = 0 # Count lenght of forecast that corresponds to number of seconds between the currrent time and the target time length = count_tot_seconds(target_date) # End-index for the sequences. end_idx = start_idx + length # Input-signals for the model. x = x_test_scaled[start_idx:end_idx] x = np.expand_dims(x, axis=0) # Use the model to predict the output-signals. y_pred = model.predict(x) # The output of the model is between 0 and 1. # Do an inverse map to get it back to the scale # of the original data-set. y_pred_rescaled = x_scaler.inverse_transform(y_pred[0]) # pred_dict = {i: None for i in target_names} # # For each output-signal. # for signal in range(len(target_names)): # # Get the output-signal predicted by the model. # signal_pred = y_pred_rescaled[:, signal] # # pred_dict[target_names[signal] = signal_pred # np.savetxt('predicted.csv',y_pred_rescaled,delimiter= ",") #Creating pandas dataframe from numpy array predictions = pd.DataFrame(y_pred_rescaled) timestamp = [] time = datetime(2018, 12, 3, 0, 0, 0) for i in range(length): time += timedelta(seconds=1) timestamp.append(str(time)) timestamps = pd.Series(timestamp) predictions['timestamp'] = timestamps.values np.savetxt("predictions.csv", predictions, delimiter=",", fmt='%s') return None
from keras.layers import SimpleRNN, Dense from keras.layers import LSTM from keras import optimizers from tensorflow.python.keras.initializers import RandomUniform from keras.optimizers import SGD from keras.optimizers import Adam from sklearn import preprocessing from keras.layers import LSTM, Dense, Bidirectional, Input,Dropout,BatchNormalization, CuDNNGRU, CuDNNLSTM model = Sequential() model.add(SimpleRNN(128, input_shape=(FL, 1), return_sequences = True)) model.add(SimpleRNN(128, input_shape=(FL, 1))) model.add(Dense(32, activation="sigmoid")) model.add(Dense(1, kernel_initializer=RandomUniform(minval =-0, maxval = 0), activation="tanh")) model.compile(loss='mse', optimizer=Adam(lr=0.002)) plt.plot(Y_train, label="target") plt.plot(model.predict(X_train), label="output") print() plt.legend() plt.title("Before training") plt.show() history = model.fit(X_train, Y_train, nb_epoch=100, batch_size=500, verbose = 1)
def run_model(data, df_targets): #final_mse = np.empty((len(split))) final_mse = [] for temp_split in split: x_data = data.values y_data = df_targets.values.reshape(-1,1) num_data = len(x_data) train_split = temp_split num_train = int(train_split * num_data) num_test = num_data - num_train x_train = x_data[0:num_train] x_test = x_data[num_train:] y_train = y_data[0:num_train] y_test = y_data[num_train:] num_x_signals = x_data.shape[1] num_y_signals = y_data.shape[1] x_scaler = MinMaxScaler() x_train_scaled = x_scaler.fit_transform(x_train) x_test_scaled = x_scaler.transform(x_test) y_scaler = MinMaxScaler() y_train_scaled = y_scaler.fit_transform(y_train) y_test_scaled = y_scaler.transform(y_test) batch_size = 256 sequence_length = 100 generator = batch_generator(batch_size, sequence_length, num_x_signals, num_y_signals, num_train, x_train_scaled, y_train_scaled) validation_data = (np.expand_dims(x_test_scaled, axis=0), np.expand_dims(y_test_scaled, axis=0)) #model_mse = np.empty((2)) model_mse = [] for i in range(2): model = Sequential() model.add(GRU(units=512, return_sequences=True, input_shape=(None, num_x_signals,))) model.add(Dense(num_y_signals, activation='sigmoid')) if False: from tensorflow.python.keras.initializers import RandomUniform # Maybe use lower init-ranges. init = RandomUniform(minval=-0.05, maxval=0.05) model.add(Dense(num_y_signals, activation='linear', kernel_initializer=init)) optimizer = RMSprop(lr=1e-3) model.compile(loss=loss_mse_warmup, optimizer=optimizer, metrics = ['mse']) path_checkpoint = 'best_model' callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) callback_early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1) callbacks = [callback_checkpoint, callback_early_stopping] model.fit(x=generator, epochs=10, steps_per_epoch=100, validation_data=validation_data, callbacks = callbacks) try: model.load_weights(path_checkpoint) print('Success') except Exception as error: print("Error trying to load checkpoint.") print(error) # Input-signals for the model. x = np.expand_dims(x_test_scaled, axis=0) # Use the model to predict the output-signals. y_pred = model.predict(x) y_pred_rescaled = y_scaler.inverse_transform(y_pred[0]) temp_mse = np.sqrt(mean_squared_error(y_test, y_pred_rescaled)) temp_mse = temp_mse.item() #print(temp_mse) model_mse.append(temp_mse) #model_mse = np.append(model_mse, temp_mse) #np.insert(model_mse,0, temp_mse) print('model finished') print('split finished') #print(model_mse) #print(np.mean(model_mse)) final_mse.append(np.mean(temp_mse)) #final_mse = np.append(final_mse, np.mean(model_mse)) #final_mse.insert(0, np.mean(model_mse)) #np.insert(final_mse, 0, model_mse) return_final_mse = np.array(final_mse) return return_final_mse
def model(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' weather.maybe_download_and_extract() cities = weather.cities df = weather.load_resampled_data() df.drop(('Esbjerg', 'Pressure'), axis=1, inplace=True) df.drop(('Roskilde', 'Pressure'), axis=1, inplace=True) df['Various', 'Day'] = df.index.dayofyear df['Various', 'Hour'] = df.index.hour target_city = 'Odense' target_names = ['Temp', 'WindSpeed', 'Pressure'] shift_days = 1 shift_steps = shift_days * 2 df_targets = df[target_city][target_names].shift(-shift_steps) x_data = df.values[0:-shift_steps] print(type(x_data)) print("Shape:", x_data.shape) y_data = df_targets.values[:-shift_steps] num_data = len(x_data) train_split = 0.9 num_train = int(train_split * num_data) num_test = num_data - num_train x_train = x_data[0:num_train] x_test = x_data[num_train:] print(len(x_train) + len(x_test)) y_train = y_data[0:num_train] y_test = y_data[num_train:] print(len(y_train) + len(y_test)) num_x_signals = x_data.shape[1] num_y_signals = y_data.shape[1] print("Min:", np.min(x_train)) print("Max:", np.max(x_train)) x_scaler = MinMaxScaler() x_train_scaled = x_scaler.fit_transform(x_train) print("Min:", np.min(x_train_scaled)) print("Max:", np.max(x_train_scaled)) x_test_scaled = x_scaler.transform(x_test) y_scaler = MinMaxScaler() y_train_scaled = y_scaler.fit_transform(y_train) y_test_scaled = y_scaler.transform(y_test) print(x_train_scaled.shape) print(y_train_scaled.shape) batch_size = 64 sequence_length = 24 * 7 * 8 generator = self.batch_generator(batch_size, sequence_length, num_x_signals, num_y_signals, num_train, x_train_scaled, y_train_scaled) x_batch, y_batch = next(generator) print(x_batch.shape) print(y_batch.shape) validation_data = (np.expand_dims(x_test_scaled, axis=0), np.expand_dims(y_test_scaled, axis=0)) with tf.device('/:GPU:0'): model = Sequential() model.add( GRU(units=512, return_sequences=True, input_shape=( None, num_x_signals, ))) model.add(Dense(num_y_signals, activation='sigmoid')) if False: from tensorflow.python.keras.initializers import RandomUniform # Maybe use lower init-ranges. init = RandomUniform(minval=-0.05, maxval=0.05) model.add( Dense(num_y_signals, activation='linear', kernel_initializer=init)) optimizer = RMSprop(lr=1e-3) model.compile(loss=self.loss_mse_warmup, optimizer=optimizer) model.summary() path_checkpoint = '/tmp/hvass_checkpoint.keras' callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True) callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1) callback_tensorboard = TensorBoard(log_dir='/tmp/hvass_logs/', histogram_freq=0, write_graph=False) callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1) callbacks = [ callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr ] model.fit(x=generator, epochs=20, steps_per_epoch=100, validation_data=validation_data, callbacks=callbacks)
from keras.models import Sequential from keras.layers import SimpleRNN, Dense from keras.layers import LSTM from keras import optimizers from tensorflow.python.keras.initializers import RandomUniform from keras.optimizers import SGD from keras.optimizers import Adam from sklearn import preprocessing from keras.layers import LSTM, Dense, Bidirectional, Input, Dropout model = Sequential() model.add(SimpleRNN(128, input_shape=(FL, 1), return_sequences=True)) model.add(SimpleRNN(128, input_shape=(FL, 1))) model.add(Dense(32, activation="sigmoid")) model.add( Dense(1, kernel_initializer=RandomUniform(minval=-0, maxval=0), activation="tanh")) model.compile(loss='mse', optimizer=Adam(lr=0.0002)) history = model.fit(X, Y_train, nb_epoch=100, batch_size=256, verbose=1) M = (model.predict(Xv)) Mi = scaler.inverse_transform(M) Vy = scaler.inverse_transform(Y_test) T = 0 R = 0 for i in range(0, len(Vy)): if Mi[i] > 0.0000 and Mi[i] != 0 and Vy[i] != 0: T = T + 1 if Vy[i] > 0:
plt.plot(y_batch[6]) # Now, we are prepared to implement our RNN model # In[65]: # During testing, the optimal number of hidden units can be adjusted model = Sequential() model.add(GRU(units=200, return_sequences=True, input_shape=( None, num_f, ))) model.add( Dense(1, activation='sigmoid', kernel_initializer=RandomUniform(-0.01, 0.01))) # Compile Time!!!! # In[70]: # define optimization function adam_opt = Adam(lr=0.001, beta_1=0.85, beta_2=0.95, epsilon=None, decay=0.0, amsgrad=False) model.compile(loss="mse", optimizer=adam_opt, metrics=['mae', 'acc']) # In[71]: