class RecurrentPPONet(ConvolutionalPPONet): num_rnn_units = 256 # noinspection PyAttributeOutsideInit def init_extra_layers(self): super().init_extra_layers() self.rnn_layer = LSTM( self.num_rnn_units, return_sequences=True, implementation=2, stateful=True, recurrent_activation='sigmoid', name=self.name + 'rnn') def make_main_model(self): processed_observation = self.vision_net_layer(self.obs_input) rnn_output = self.rnn_layer(processed_observation) policy_output = self.policy_output_layer(rnn_output) value_pre_output = self.value_output_layer(rnn_output) value_output = self.norm_value_output_layer(value_pre_output) model = Model( name=self.name + 'rnn_model', inputs=[self.obs_input], outputs=[policy_output, value_output]) return model def current_states(self) -> Any: return K.batch_get_value(self.rnn_layer.states) def reset_states(self, states=None): self.rnn_layer.reset_states(states)
def run_gru(s): s.print_settings() start_time = timeit.default_timer() if s.implementation == "keras": if s.use_binary: raise Exception("Binary Keras not implemented") rnn = Sequential() if s.rnn_type == "lstm": rnn_layer = LSTM(s.nodes, input_shape=s.rnn_input_shape, batch_size=s.rnn_batch_size, stateful=s.stateful, return_sequences=s.return_sequences) rnn.add(rnn_layer) elif s.rnn_type == "gru": rnn_layer = GRU(s.nodes, input_shape=s.rnn_input_shape, batch_size=s.rnn_batch_size, stateful=s.stateful, return_sequences=s.return_sequences) rnn.add(rnn_layer) if s.use_dropout: rnn.add(Dropout(0.5)) rnn.add(Dense(1)) opt = adam(lr=s.lr, decay=0.0, epsilon=s.adam_eps) #, clipvalue=1.)#1e-3) #opt = rmsprop(lr=s.lr) rnn.compile(loss=s.loss, optimizer=opt) if s.max_verbosity > 0: print(rnn.summary()) else: raise Exception("Unknown implementation " + s.implementation) sequence = readDataSet(s.dataSet, s.dataSetDetailed, s).values if s.limit_to: sequence = sequence[:s.limit_to] #Get rid of unneeded columns sequence = sequence[:, 0:s.feature_count] #sequence[-1000,0] = 666 #print "Changed -1000 to 666" """ We need to leave some values unpredicted in front so that - We can fill the lookback window for each prediction - We can get the value from 1 season earlier for MASE --> Don't use the first `front_buffer` values as prediction --> Independent from `prediction_step`, so the first actual value predicted is `front_buffer`\ plus however many steps the `prediction_step` is higher than 1 In other words, the most recent X-value for the first prediction will be the final value in the `front_buffer` """ first_prediction_index = s.front_buffer + s.predictionStep - 1 targetInput = sequence[ first_prediction_index:, 0].copy() #grab this now to avoid having to denormalize dp = DataProcessor() if s.normalization_type == 'default': (meanSeq, stdSeq) = dp.normalize( sequence, s.nTrain if s.cutoff_normalize else len(sequence)) elif s.normalization_type == 'windowed': dp.windowed_normalize(sequence, columns=[0]) if s.feature_count > 1: dp.normalize(sequence, s.nTrain, columns=range(1, s.feature_count)) elif s.normalization_type == 'AN': an = AdaptiveNormalizer(s.lookback, s.lookback + s.predictionStep) an.set_pruning(False) an.set_source_data(sequence, s.nTrain) an.do_ma('s') an.do_stationary() an.remove_outliers() seq_norm = an.do_adaptive_normalize() if s.feature_count > 1: dp.normalize(sequence, s.nTrain, columns=range(1, s.feature_count)) start = sequence.shape[0] - seq_norm.shape[ 0] - s.lookback - s.predictionStep + 1 for i in range(seq_norm.shape[0]): seq_norm[i, :, 1:s.feature_count] = sequence[start + i:start + i + seq_norm.shape[1], 1:s.feature_count] else: raise Exception("Unsupported normalization type: " + s.normalization_type) #seq_actual = sequence[s.front_buffer:] #Leave enough headroom for MASE calculation and lookback #seq_full_norm = np.reshape(sequence[:,0], (sequence.shape[0],)) #seq_actual_norm = seq_full_norm[s.front_buffer:] if s.normalization_type != "AN": #Default and windowed change the seq itself but still require creating lookback frames allX = getX(sequence, s) allY = sequence[first_prediction_index:, 0] else: #AN creates a new array but takes care of lookback internally allX = seq_norm[:, 0:-s.predictionStep] allY = np.reshape(seq_norm[:, -1, 0], (-1, )) predictedInput = np.full((len(allY), ), np.nan) #Initialize all predictions to NaN trainX = allX[:s.nTrain] trainY = allY[:s.nTrain] trainX = np.reshape(trainX, s.actual_input_shape_train) trainY = np.reshape(trainY, s.actual_output_shape_train) if s.implementation == "keras": #for _ in tqdm(range(s.epochs)): for _ in range(1): rnn.fit( trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=min(s.max_verbosity, 2), shuffle=not s.stateful ) #, validation_data=(trainX, trainY), callbacks=[TensorBoard(log_dir='./logs', histogram_freq=1, write_grads=True)]) if s.stateful: rnn_layer.reset_states() # for layer in rnn.layers: # print layer.get_weights() #for i in xrange(0, s.nTrain + s.predictionStep): # rnn.predict(np.reshape(allX[i], (1, 1, x_dims))) #predictedInput[s.nTrain + s.predictionStep : len(allX)] = rnn.predict(np.reshape(allX[s.nTrain + s.predictionStep : len(allX)], (1, 12510, x_dims))) latestStart = None do_non_lookback = True latest_onego = 0 #buffer = s.retrain_interval / 2 buffer = 0 for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)), disable=s.max_verbosity == 0): if i % s.retrain_interval == 0 and s.online and i > s.nTrain + s.predictionStep + buffer: do_non_lookback = True if s.normalization_type == 'AN': predictedInput = np.array( an.do_adaptive_denormalize( predictedInput, therange=(i - s.retrain_interval, i))) latestStart = i an.set_ignore_first_n(i - s.nTrain - s.predictionStep) an.do_ma('s') an.do_stationary() an.remove_outliers() seq_norm = an.do_adaptive_normalize() if s.feature_count > 1: dp.normalize(sequence, s.nTrain, columns=range(1, s.feature_count)) start = sequence.shape[0] - seq_norm.shape[ 0] - s.lookback - s.predictionStep + 1 for j in range(seq_norm.shape[0]): seq_norm[j, :, 1:s.feature_count] = sequence[ start + j:start + j + seq_norm.shape[1], 1:s.feature_count] allX = seq_norm[:, 0:-s.predictionStep] allY = np.reshape(seq_norm[:, -1, 0], (-1, )) if s.lookback: trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep] trainY = allY[i - s.nTrain - s.predictionStep:i - s.predictionStep] else: trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep] trainY = allY[i - s.nTrain - s.predictionStep:i - s.predictionStep] trainX = np.reshape(trainX, s.actual_input_shape_train) trainY = np.reshape(trainY, s.actual_output_shape_train) if s.implementation == "keras": if s.reset_on_retrain: rnn = Sequential() if s.rnn_type == "lstm": rnn_layer = LSTM(s.nodes, input_shape=s.rnn_input_shape, batch_size=s.rnn_batch_size, stateful=s.stateful, return_sequences=s.return_sequences) rnn.add(rnn_layer) elif s.rnn_type == "gru": rnn_layer = GRU(s.nodes, input_shape=s.rnn_input_shape, batch_size=s.rnn_batch_size, stateful=s.stateful, return_sequences=s.return_sequences) rnn.add(rnn_layer) if s.use_dropout: rnn.add(Dropout(0.5)) rnn.add(Dense(1)) opt = adam(lr=s.lr, decay=0.0, epsilon=s.adam_eps) # , clipvalue=1.)#1e-3) #opt = rmsprop(lr=s.lr) rnn.compile(loss=s.loss, optimizer=opt) for _ in range(1): rnn.fit(trainX, trainY, epochs=s.epochs_retrain, batch_size=s.batch_size, verbose=2, shuffle=not s.stateful) if s.stateful: rnn_layer.reset_states() if s.lookback: if s.implementation == "keras": predictedInput[i] = rnn.predict( np.reshape(allX[i], s.predict_input_shape)) elif do_non_lookback: do_non_lookback = False up_to = min(allX.shape[0], i - (i % s.retrain_interval) + s.retrain_interval) if s.online else allX.shape[0] #start_time = time.time() #print allX[0] start = 0 if s.refeed_on_retrain else latest_onego new_predicts = rnn.predict( np.reshape(allX[start:up_to], (1, -1, s.x_dims))) new_predicts = np.reshape(new_predicts, (new_predicts.shape[1], )) predictedInput[i:up_to] = new_predicts[-(up_to - i):] latest_onego = up_to for i in range(s.nTrain + s.predictionStep): predictedInput[i] = np.nan if s.normalization_type == 'default': predictedInput = dp.denormalize(predictedInput, meanSeq[0], stdSeq[0]) elif s.normalization_type == 'windowed': dp.windowed_denormalize(predictedInput, pred_step=s.predictionStep) elif s.normalization_type == 'AN': if latestStart: predictedInput = np.array( an.do_adaptive_denormalize(predictedInput, therange=(latestStart, len(predictedInput)))) else: predictedInput = np.array( an.do_adaptive_denormalize(predictedInput)) if an.pruning: targetInput = np.delete(targetInput, an.deletes) if s.max_verbosity > 0: print "Final time", (timeit.default_timer() - start_time) #print "Last not to change:", predictedInput[-996], targetInput[-996] #print "First to change:", predictedInput[-995], targetInput[-995] dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru', s.predictionStep, s.max_verbosity) for ignore in s.ignore_for_error: skipTrain = ignore from plot import computeSquareDeviation squareDeviation = computeSquareDeviation(predictedInput, targetInput) squareDeviation[:skipTrain] = None nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput) if s.max_verbosity > 0: print "", s.nodes, "NRMSE {}".format(nrmse) mae = np.nanmean(np.abs(targetInput - predictedInput)) if s.max_verbosity > 0: print "MAE {}".format(mae) mape = errors.get_mape(predictedInput, targetInput, skipTrain) if s.max_verbosity > 0: print "MAPE {}".format(mape) mase = errors.get_mase(predictedInput, targetInput, np.roll(targetInput, s.season), skipTrain) if s.max_verbosity > 0: print "MASE {}".format(mase) return mase
class WorldEnvModel(StatefulNet): """ MDN-RNN model (a combination of a Mixture Density Network and an RNN) used to predict future observations in the environment. The implementation is based on papers [Generating Sequences With Recurrent Neural Networks] (https://arxiv.org/pdf/1308.0850/) [A Neural Representation of Sketch Drawings] (https://arxiv.org/pdf/1704.03477/) except it uses just simple diagonal covariance matrix. """ def __init__(self, name: str, batch_size: int, time_steps: Optional[int], num_actions: int, latent_dim_size: int, num_rnn_units: int, mixture_size: int, temperature: float, optimizer: Optional[optimizers.Optimizer]=None): encoded_obs = Input( batch_shape=(batch_size, time_steps, latent_dim_size), name=name + 'z_input') actions = Input( batch_shape=(batch_size, time_steps,), dtype='int32', name=name + 'a_input') actions_categorical = Lambda( lambda x: K.one_hot(x, num_actions), output_shape=(time_steps, num_actions))(actions) rnn_input = Concatenate( name=name + 'merged_input')([encoded_obs, actions_categorical]) self._rnn_layer = LSTM(num_rnn_units, return_sequences=True, stateful=True, name=name + 'rnn') rnn_outputs = self._rnn_layer(rnn_input) temperature = K.constant(temperature) param_group_size = latent_dim_size * mixture_size expected_z = Input( shape=(time_steps, latent_dim_size,), name=name + 'z_input_expected') mu = TimeDistributed( Dense(param_group_size), name=name + 'mu')(rnn_outputs) mu = TimeDistributed(Reshape((mixture_size, latent_dim_size)))(mu) log_variances = TimeDistributed( Dense(param_group_size), name=name + 'log_var')(rnn_outputs) log_variances = TimeDistributed( Reshape((mixture_size, latent_dim_size)))(log_variances) variances = TimeDistributed( Lambda(K.exp, output_shape=(mixture_size, latent_dim_size)), name=name + 'variances')(log_variances) raw_mixture_weights = TimeDistributed( Dense(mixture_size), name=name + 'raw_mix_weights')(rnn_outputs) raw_mixture_weights = Lambda( lambda x: x / temperature, output_shape=(mixture_size,), name=name + 'apply_temp')(raw_mixture_weights) mixture_weights = Activation('softmax')(raw_mixture_weights) predicted_reward = TimeDistributed( Dense(1,), name=name + 'predicted_reward')(rnn_outputs) is_done = TimeDistributed( Dense(1, activation='sigmoid'), name=name + 'predicted_done')(rnn_outputs) model = Model( name=name + 'model', inputs=EnvModelInput( encoded_obs=encoded_obs, actions=actions, expected_z=expected_z), outputs=EnvModelOutput( mu=mu, variances=variances, mixture_weights=mixture_weights, rnn_outputs=rnn_outputs, predicted_reward=predicted_reward, is_done=is_done)) if optimizer is not None: expected_z_expanded = K.expand_dims(expected_z, 2) # The loss as it would look like formally, as shown in the papers: # pdf = ( # K.exp(-0.5 * K.sum(K.square(expected_z_expanded - mu) # / variances, axis=-1)) # / K.sqrt(np.power(2 * np.pi, latent_dim_size) # * K.prod(variances, axis=-1))) # mixture_pdf = K.sum(mixture_weights * pdf, axis=-1) # mixture_loss = K.mean(K.sum(K.log(mixture_pdf), axis=-1)) # The same loss re-written to be more numerically stable mixture_loss = K.mean( -K.sum( K.logsumexp( K.log(mixture_weights) - 0.5 * K.sum(K.square(expected_z_expanded - mu) / variances, axis=-1) - 0.5 * latent_dim_size * K.log(2 * np.pi) - 0.5 * K.sum(log_variances, axis=-1), axis=-1), axis=-1)) model.add_loss(mixture_loss) model.compile(optimizer, loss=[ None, None, None, None, losses.mean_squared_error, # loss for reward predictions losses.binary_crossentropy, # loss for the "done" signal ]) self.model = model @staticmethod def draw_samples(env_output: EnvModelOutput)->EnvModelPrediction: batch_size, time_steps, mixtures = env_output.mixture_weights.shape latent_dim_size = env_output.mu.shape[-1] observations = np.zeros((batch_size, time_steps, latent_dim_size)) for b in range(batch_size): for t in range(time_steps): mixture = np.random.choice( mixtures, p=env_output.mixture_weights[b, t]) means = env_output.mu[b, t, mixture] variances = env_output.variances[b, t, mixture] samples = ( means + np.random.standard_normal(means.shape) * np.sqrt(variances) ) observations[b, t] = samples return EnvModelPrediction( observations=observations, is_done=(env_output.is_done > 0.5).reshape(batch_size, time_steps)) def current_states(self): return K.batch_get_value(self._rnn_layer.states) def reset_states(self, states=None): self._rnn_layer.reset_states(states) @staticmethod def state_for_controller(states): return states[0]
Here is a complete example:''' paragraph1 = np.random.random((20, 10, 50)).astype(np.float32) paragraph2 = np.random.random((20, 10, 50)).astype(np.float32) paragraph3 = np.random.random((20, 10, 50)).astype(np.float32) lstm_layer = LSTM(64, stateful=True) output = lstm_layer(paragraph1) output = lstm_layer(paragraph2) output = lstm_layer(paragraph3) # reset_states() will reset the cached state to the original initial_state. # If no initial_state was provided, zero-states will be used by default. lstm_layer.reset_states() #================================================ '''RNN State Reuse The recorded states of the RNN layer are not included in the `layer.weights()`. If you would like to reuse the state from a RNN layer, you can retrieve the states value by `layer.states` and use it as the initial state for a new layer via the Keras functional API like `new_layer(inputs, initial_state=layer.states)`, or model subclassing. Please also note that sequential model might not be used in this case since it only supports layers with single input and output, the extra input of initial state makes it impossible to use here.''' paragraph1 = np.random.random((20, 10, 50)).astype(np.float32) paragraph2 = np.random.random((20, 10, 50)).astype(np.float32)