def generate_unconditionally(random_seed=1): np.random.seed(random_seed) # Set up the generator inputs = Input(shape=(1,3)) x = LSTM(256, return_sequences=True,batch_input_shape = (1,1,3))(inputs) x = LSTM(256)(x) outputs = mdn.MDN(3, 10)(x) generator = Model(inputs=inputs,outputs=outputs) generator.compile(loss=mdn.get_mixture_loss_func(3,10), optimizer=keras.optimizers.Adam()) generator.load_weights('model_weights.h5') predictions = [] stroke_pt = np.asarray([1,0,0], dtype=np.float32) # start point predictions.append(stroke_pt) for i in range(400): stroke_pt = mdn.sample_from_output(generator.predict(stroke_pt.reshape(1,1,3))[0], 3, 10) predictions.append(stroke_pt.reshape((3,))) predictions = np.array(predictions, dtype=np.float32) for i in range(len(predictions)): predictions[i][0] = (predictions[i][0] > 0.5)*1 predictions[i][1] = predictions[i][1] * std_x + x_mean predictions[i][2] = predictions[i][2] * std_y + y_mean return predictions #stroke = generate_unconditionally() #plot_stroke(stroke)
def fit_lstm(train, batch_size=1, nb_epoch=5, lstm_neurons=1, timesteps=1, dense_neurons=1, mdn_output=1, mdn_Nmixes=1): X, y = train[:, 0:-1], train[:, -1] #print(X.shape) X = X.reshape(X.shape[0], timesteps, X.shape[1]) model = Sequential() model.add( LSTM(lstm_neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True)) model.add(Dense(dense_neurons)) model.add(mdn.MDN(mdn_output, mdn_Nmixes)) adam = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(loss=mdn.get_mixture_loss_func(mdn_output, mdn_Nmixes), optimizer=adam) print(model.summary()) for i in range(nb_epoch): model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False) model.reset_states() return model
def build_model(seq_len=30, hidden_units=256, num_mixtures=5, layers=2, out_dim=2, time_dist=True, inference=False, compile_model=True, print_summary=True): """Builds a EMPI MDRNN model for training or inference. Keyword Arguments: seq_len : sequence length to unroll hidden_units : number of LSTM units in each layer num_mixtures : number of mixture components (5-10 is good) layers : number of layers (2 is good) out_dim : number of dimensions for the model = number of degrees of freedom + 1 (time) time_dist : time distributed or not (default True) inference : inference network or training (default False) compile_model : compiles the model (default True) print_summary : print summary after creating mdoe (default True) """ print("Building EMPI Model...") # Set up training mode stateful = False batch_shape = None # Set up inference mode. if inference: stateful = True batch_shape = (1, 1, out_dim) inputs = keras.layers.Input(shape=(seq_len, out_dim), name='inputs', batch_shape=batch_shape) lstm_in = inputs # starter input for lstm for layer_i in range(layers): ret_seq = True if (layer_i == layers - 1) and not time_dist: # return sequences false if last layer, and not time distributed. ret_seq = False lstm_out = keras.layers.LSTM(hidden_units, name='lstm' + str(layer_i), return_sequences=ret_seq, stateful=stateful)(lstm_in) lstm_in = lstm_out mdn_layer = mdn.MDN(out_dim, num_mixtures, name='mdn_outputs') if time_dist: mdn_layer = keras.layers.TimeDistributed(mdn_layer, name='td_mdn') mdn_out = mdn_layer(lstm_out) # apply mdn model = keras.models.Model(inputs=inputs, outputs=mdn_out) if compile_model: loss_func = mdn.get_mixture_loss_func(out_dim, num_mixtures) optimizer = keras.optimizers.Adam() # keras.optimizers.Adam(lr=0.0001)) model.compile(loss=loss_func, optimizer=optimizer) model.summary() return model
def rnn_loss(z_true, z_pred): assert z_true.shape[-1] == self.out_dim assert z_pred.shape[-1] == (2 * self.out_dim + 1) * self.n_mixes z_loss = mdn.get_mixture_loss_func(self.out_dim, self.n_mixes)(z_true, z_pred) return z_loss
def __init__( self, input_dim=35, lstm_nodes=256, output_dim=32, num_mixtures=5, num_timesteps=999, hidden_units=None, batch_size=100, grad_clip=1.0, initial_learning_rate=0.001, end_learning_rate=0.00001, epochs=1, batch_per_epoch=1, load_model=False, results_dir=None, ): super(Memory, self).__init__(name="Memory") decay_steps = epochs * batch_per_epoch learning_rate = tf.keras.optimizers.schedules.PolynomialDecay( initial_learning_rate=initial_learning_rate, decay_steps=decay_steps, end_learning_rate=end_learning_rate) self.optimizer = tf.keras.optimizers.Adam(0.001, clipvalue=grad_clip) self.loss_function = mdn.get_mixture_loss_func(output_dim, num_mixtures) self.num_timesteps = num_timesteps self.lstm_nodes = lstm_nodes self.input_dim = input_dim self.output_dim = output_dim self.num_mixtures = num_mixtures #lstm_cell = tf.keras.layers.LSTMCell(lstm_nodes, kernel_initializer='glorot_uniform',recurrent_initializer='glorot_uniform',bias_initializer='zeros',name='lstm_cell') self.lstm = tf.keras.layers.LSTM(lstm_nodes, return_sequences=True, return_state=True, input_shape=(num_timesteps, input_dim), name='lstm_layer') if hidden_units is None: self.hidden_layers = [] else: self.hidden_layers = [ tf.keras.layers.Dense(n_units, activation='relu') for n_units in hidden_units ] self.mdn_out = tf.keras.layers.TimeDistributed(mdn.MDN( output_dim, num_mixtures, name='mdn_outputs'), name='td_mdn') if load_model: self.load_weights(results_dir)
def cnn_model_3d_mdn(self, voxel_dim, deviation_channels, num_of_mixtures=5): """Build the 3D Model with a Mixture Density Network output the gives parameters of a Gaussian Mixture Model as output, to be used if the system is expected to be collinear (Multi-Stage Assembly Systems) i.e. a single input can have multiple outputs Functions for predicting and sampling from a MDN.py need to used when deploying a MDN based model refer https://publications.aston.ac.uk/id/eprint/373/1/NCRG_94_004.pdf for more details on the working of a MDN model refer https://arxiv.org/pdf/1709.02249.pdf to understand how a MDN model can be leveraged to estimate the epistemic and aleatoric unceratninty present in manufacturing sytems based on the data collected :param voxel_dim: The voxel dimension of the input, reuired to build input to the 3D CNN model :type voxel_dim: int (required) :param voxel_channels: The number of voxel channels in the input structure, required to build input to the 3D CNN model :type voxel_channels: int (required) :param number_of_mixtures: The number of mixtures in the Gaussian Mixture Model output, defaults to 5, can be increased if higher collinearity is expected :type number_of_mixtures: int """ assert self.model_type == "regression", "Mixture Density Network Should be a Regression Model" from keras.layers import Conv3D, MaxPool3D, Flatten, Dense from keras.models import Sequential import mdn model = Sequential() model.add( Conv3D(32, kernel_size=(5, 5, 5), strides=(2, 2, 2), activation='relu', input_shape=(voxel_dim, voxel_dim, voxel_dim, deviation_channels))) model.add( Conv3D(32, kernel_size=(4, 4, 4), strides=(2, 2, 2), activation='relu')) model.add( Conv3D(32, kernel_size=(3, 3, 3), strides=(1, 1, 1), activation='relu')) model.add(MaxPool3D(pool_size=(2, 2, 2))) model.add(Flatten()) model.add( Dense(128, kernel_regularizer=regularizers.l2(0.02), activation='relu')) #model.add(Dropout(0.3)) model.add(Dense(self.output_dimension, activation=final_layer_avt)) model.add(mdn.MDN(self.output_dimension, num_of_mixtures)) model.compile(loss=mdn.get_mixture_loss_func(self.output_dimension, num_of_mixtures), optimizer='adam') print("3D CNN Mixture Density Network model successfully compiled") return model
def load_model(self, model_name): decoder = keras.Sequential() decoder.add(keras.layers.LSTM(HIDDEN_UNITS, batch_input_shape=(1,1,NUMBER_DIM), return_sequences=True, stateful=True)) decoder.add(keras.layers.LSTM(HIDDEN_UNITS, stateful=True)) decoder.add(mdn.MDN(NUMBER_DIM, NUMBER_MIXTURES)) decoder.compile(loss=mdn.get_mixture_loss_func(NUMBER_DIM,NUMBER_MIXTURES), optimizer=keras.optimizers.Adam()) #decoder.summary() decoder.load_weights(model_name) # load weights independently from file print('Model Loaded!') return decoder
def test_save_mdn(): """Make sure an MDN model can be saved and loaded""" N_HIDDEN = 5 N_MIXES = 5 model = keras.Sequential() model.add( keras.layers.Dense(N_HIDDEN, batch_input_shape=(None, 1), activation='relu')) model.add(mdn.MDN(1, N_MIXES)) model.compile(loss=mdn.get_mixture_loss_func(1, N_MIXES), optimizer=keras.optimizers.Adam()) model.save('test_save.h5') m_2 = keras.models.load_model('test_save.h5', custom_objects={ 'MDN': mdn.MDN, 'mdn_loss_func': mdn.get_mixture_loss_func(1, N_MIXES) }) assert isinstance(m_2, keras.engine.sequential.Sequential)
def load_model(model): output_dim = 10 num_mixes = 1 model = tf.keras.models.load_model(dataDir + 'models/%s.h5' % model, custom_objects={ 'MDN': mdn.MDN, 'mdn_loss_func': mdn.get_mixture_loss_func( output_dim, num_mixes) }) return model
def M(seq_len=128, act_len=3, output_dims=32, n_mixes=5, weights=None): M = Sequential([ Input((None, act_len + utils.LATENT_SIZE)), LSTM(256, return_sequences=True), mdn.MDN(output_dims, n_mixes) ]) M.compile(loss=mdn.get_mixture_loss_func(output_dims, n_mixes), optimizer=tf.keras.optimizers.Adam(), ) if weights: M.load_weights(weights) return M
def test_build_mdn(): N_HIDDEN = 5 N_MIXES = 5 model = keras.Sequential() model.add( keras.layers.Dense(N_HIDDEN, batch_input_shape=(None, 1), activation='relu')) model.add(keras.layers.Dense(N_HIDDEN, activation='relu')) model.add(mdn.MDN(1, N_MIXES)) model.compile(loss=mdn.get_mixture_loss_func(1, N_MIXES), optimizer=keras.optimizers.Adam()) assert isinstance(model, keras.engine.sequential.Sequential)
def test_number_of_weights(): """Make sure the number of trainable weights is set up correctly""" N_HIDDEN = 5 N_MIXES = 5 inputs = keras.layers.Input(shape=(1, )) x = keras.layers.Dense(N_HIDDEN, activation='relu')(inputs) m = mdn.MDN(1, N_MIXES) predictions = m(x) model = keras.Model(inputs=inputs, outputs=predictions) model.compile(loss=mdn.get_mixture_loss_func(1, N_MIXES), optimizer=keras.optimizers.Adam()) num_mdn_params = np.sum( [w.get_shape().num_elements() for w in m.trainable_weights]) assert (num_mdn_params == 90)
def train_rnn(self, epochs, batch_size, model_name): # one dimension for up/down, one dimension for diff X_sequence = [] idx = 0 while idx < len(self.data)-1: curr = self.data[idx] next = self.data[idx+1] X_sequence.append([np.sign(next-curr), np.abs(next-curr)]) idx += 1 # prepare training dataset X_train = [] Y_train = [] for i in range(len(self.data)-self.lags-1): example = X_sequence[i : i + self.lags] X_train.append(example[:-1]) Y_train.append(example[-1]) print(self.lags-1, len(X_train[-1]), len(Y_train[-1])) X_train = np.array(X_train).reshape(len(Y_train), self.lags-1, NUMBER_DIM) Y_train = np.array(Y_train).reshape(len(Y_train), NUMBER_DIM) print(X_train.shape, Y_train.shape) # print(np.isnan(np.sum(X_train))) # print(np.isnan(np.sum(Y_train))) print('batch_size:{}, epoch:{}'.format(batch_size, epochs)) # Sequential model model = keras.Sequential() # Add two LSTM layers model.add(keras.layers.LSTM(HIDDEN_UNITS, batch_input_shape=(None,self.lags-1,NUMBER_DIM), return_sequences=True)) model.add(keras.layers.LSTM(HIDDEN_UNITS)) # Here's the MDN layer model.add(mdn.MDN(NUMBER_DIM, NUMBER_MIXTURES)) # Now we compile the MDN RNN - need to use a special loss function with the right number of dimensions and mixtures. model.compile(loss=mdn.get_mixture_loss_func(NUMBER_DIM, NUMBER_MIXTURES), optimizer=keras.optimizers.Adam()) # Let's see what we have: model.summary() history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, callbacks=[keras.callbacks.TerminateOnNaN()]) model.save('model/{}_rnn_mdn_{}_{}_{}_{}.h5'.format(model_name, NUMBER_DIM, NUMBER_MIXTURES, batch_size, epochs)) plt.figure() plt.plot(history.history['loss']) plt.title('loss_{}_{}_{}_{}'.format(NUMBER_DIM, NUMBER_MIXTURES, batch_size, epochs)) plt.show()
def build_up_model(): keras.backend.clear_session() inputs = Input(shape=(400,3)) x = LSTM(256, return_sequences=True,batch_input_shape = (None,400,3))(inputs) x = LSTM(256)(x) outputs = mdn.MDN(3, 10)(x) model = Model(inputs=inputs,outputs=outputs) model.compile(loss=mdn.get_mixture_loss_func(3,10), optimizer=keras.optimizers.Adam()) # Fit the model history = model.fit(X, y, batch_size=128, epochs=10, validation_split = 0.2) model.save_weights('model_weights.h5')
def __init__(self, model_pars=None, data_pars=None, compute_pars=None): self.model_pars = copy.deepcopy(model_pars) self.fit_metrics = {} lstm_h_list = model_pars["lstm_h_list"] OUTPUT_DIMS = model_pars["timesteps"] N_MIXES = model_pars["n_mixes"] dense_neuron = model_pars["dense_neuron"] timesteps = model_pars["timesteps"] last_lstm_neuron = model_pars["last_lstm_neuron"] learning_rate = compute_pars["learning_rate"] metrics = compute_pars.get("metrics", ["mae"]) model = Sequential() for ind, hidden in enumerate(lstm_h_list): model.add( LSTM(units=hidden, return_sequences=True, name=f"LSTM_{ind+1}", input_shape=(timesteps, 1), recurrent_regularizer=reg.l1_l2(l1=0.01, l2=0.01))) model.add( LSTM(units=last_lstm_neuron, return_sequences=False, name=f"LSTM_{len(lstm_h_list) + 1}", input_shape=(timesteps, 1), recurrent_regularizer=reg.l1_l2(l1=0.01, l2=0.01))) model.add( Dense(dense_neuron, input_shape=(-1, lstm_h_list[-1]), activation='relu')) model.add(mdn.MDN(OUTPUT_DIMS, N_MIXES)) adam = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(loss=mdn.get_mixture_loss_func(OUTPUT_DIMS, N_MIXES), optimizer=adam) # metrics = metrics) self.model = model model.summary()
def _build(self, sequence_length, num_mixtures): #TODO: Now, what to do about the fact that episodes may have different lengths? #I'll start with just getting this to work for fixed-length sequences, then add dying and variable length after. #Building MDN-RNN #Testing to build this like the other Keras World Model implementation - because I need to capture hidden states. # Seq-to-seq predictions from https://github.com/cpmpercussion/keras-mdn-layer/blob/master/notebooks/MDN-RNN-time-distributed-MDN-training.ipynb #### THE MODEL THAT WILL BE TRAINED #TODO Do I need to give the seq length here, or is that flexible? #inputs = keras.layers.Input(shape=(sequence_length, LATENT_VECTOR_SIZE+ACTION_DIMENSIONALITY), name='inputs') inputs = keras.layers.Input( shape=(sequence_length, LATENT_VECTOR_SIZE + ACTION_DIMENSIONALITY), name='inputs') lstm_output = keras.layers.LSTM(NUM_LSTM_UNITS, name='lstm', return_sequences=True)(inputs) #lstm_layer = keras.layers.LSTM(NUM_LSTM_UNITS, name='lstm', return_sequences=True, return_state=True) #TODO If I want to use the internal RNN state for agent control, I can plug it in here. #lstm_output, _, _ = lstm_layer(inputs) #This is the trick to not pass the returned_states to the mdn! #mdn = Dense(GAUSSIAN_MIXTURES * (3 * Z_DIM))(lstm_output) # + discrete_dim mdn_output = keras.layers.TimeDistributed(mdn.MDN(LATENT_VECTOR_SIZE, num_mixtures, name='mdn_outputs'), name='td_mdn')(lstm_output) rnn = keras.models.Model(inputs=inputs, outputs=mdn_output) #### THE MODEL USED DURING PREDICTION #TODO Do I really need this forward-model? #state_input_h = keras.Input(shape=(NUM_LSTM_UNITS,)) #state_input_c = keras.Input(shape=(NUM_LSTM_UNITS,)) #state_inputs = [state_input_h, state_input_c] #_, state_h, state_c = lstm_layer(rnn_x, initial_state=[state_input_h, state_input_c]) #forward = keras.Model([rnn_x] + inputs, [state_h, state_c]) rnn.summary() #default Adam LR is 0.001. Trying half of that. #adam = keras.optimizers.Adam(lr=0.0005) adam = keras.optimizers.Adam() rnn.compile(loss=mdn.get_mixture_loss_func(LATENT_VECTOR_SIZE, num_mixtures), optimizer=adam) return (rnn, None)
def build(self, layers,): self.model = Sequential() self.model.add(Dense(layers[0],input_shape=(self.n_in,),activation='relu')) #self.model.add(Dropout(0.1)) self.model.add(Dense(layers[1],activation='relu',)) #self.model.add(Dropout(0.1)) self.model.add(Dense(layers[2],activation='relu',)) #self.model.add(Dropout(0.1)) self.model.add(Dense(layers[3],activation='relu',)) #self.model.add(Dropout(0.1)) self.model.add(mdn.MDN(self.n_out, self.n_mixes)) # compile self.model.compile(loss=mdn.get_mixture_loss_func(self.n_out,self.n_mixes),optimizer=Adam(lr=self.lrate)) print (self.model.summary()) return self.model
def compile(self, *args, loss=None, **kwargs): """compile. Literally use this as you'd use the normal compile, but don't use your own loss functions, unless your really deep in this. Let the default one go Args: loss: if you really wanna make your own loss function Returns: Nothing lol """ if loss is None: loss = mdn.get_mixture_loss_func(self.output_dim, self.num_mix) kwargs['loss'] = loss self.model.compile(*args, **kwargs)
def kSM(self, units=160, n_mixes=5): """ Initialize k-Shifted Model """ self.n_mixes = n_mixes self.OUTPUT_DIMS = self.data_processor.target_data.shape[1] self.model = Sequential() self.model.add( LSTM(units=units, return_sequences=True, input_shape=self.data_processor.input_data.shape[1:])) self.model.add(LSTM(units=units, return_sequences=True)) self.model.add(LSTM(units=units)) self.model.add(mdn.MDN(self.OUTPUT_DIMS, self.n_mixes)) self.model.compile(loss=mdn.get_mixture_loss_func( self.OUTPUT_DIMS, self.n_mixes), optimizer='nadam') print(self.model.summary())
def fclone_bidirectional_RMDN(seq_length, stack, hidden_size, drop_out, N_MIXES, OUTPUT_DIMS, mark_position): """ Input all_adjusted_input : 원하는 길이로 편집된 input data set이 저장된 dictionary all_adjusted_target : 원하는 길이로 편집된 target data set이 저장된 dictionary train_files : 정해진 비율에 따라 train data로 지정된 files의 이름이 저장된 list test_files : 정해진 비율에 따라 test data로 지정된 files의 이름이 저장된 list Output x_train : EMG dat y_train : Train target data의 값이 저장된 array """ # n stack RNN model K.clear_session() model = Sequential() # n stack for i in range(stack): model.add( Bidirectional( LSTM(hidden_size, input_shape=(seq_length, 4), return_sequences=True, kernel_initializer='he_normal'))) model.add(BatchNormalization()) # model.add(ELU(alpha=1.0)) model.add(Dropout(drop_out)) model.add(Bidirectional(LSTM(256))) # model.add(Dense(len(mark_position), kernel_initializer='he_normal')) # model.add(ELU(alpha=1.0)) model.add(mdn.MDN(OUTPUT_DIMS, N_MIXES)) # model 학습 과정 설정하기 model.compile(loss=mdn.get_mixture_loss_func(OUTPUT_DIMS, N_MIXES), optimizer=keras.optimizers.Adam()) # 생성된 model 확인 # model.summary() return model
def load(load_pars={}, **kw): path = load_pars["outpath"] model_pars = kw["model_pars"] compute_pars = kw["compute_pars"] data_pars = kw["data_pars"] custom_pars = { "MDN": mdn.MDN, "loss": mdn.get_mixture_loss_func(model_pars["timesteps"], model_pars["n_mixes"]) } model0 = load_keras({"path": path + "/armdn.h5"}, custom_pars) model = Model(model_pars=model_pars, data_pars=data_pars, compute_pars=compute_pars) model.model = model0.model session = None return model, session
def TDkSM(self, n_mixes=5, units=50, name="default2"): """ Initialize Time-Distributed k-Shifted Model """ self.n_mixes = n_mixes self.units = units self.OUTPUT_DIMS = self.data_processor.target_data.shape[2] self.model = Sequential() self.model.add( LSTM(units=self.units, return_sequences=True, input_shape=self.data_processor.input_data.shape[1:])) self.model.add(LSTM(units=self.units, return_sequences=True)) self.model.add(LSTM(units=self.units, return_sequences=True)) self.model.add(TimeDistributed(mdn.MDN(self.OUTPUT_DIMS, self.n_mixes))) self.model.compile(loss=mdn.get_mixture_loss_func( self.OUTPUT_DIMS, self.n_mixes), optimizer='nadam') self.model.summary()
def _build_decoder(self, num_mixtures): #Decoder for using the trained model decoder = keras.Sequential() decoder.add( keras.layers.LSTM(NUM_LSTM_UNITS, batch_input_shape=(1, 1, LATENT_VECTOR_SIZE + ACTION_DIMENSIONALITY), return_sequences=False, stateful=True, name="Input_LSTM")) decoder.add( mdn.MDN(LATENT_VECTOR_SIZE, num_mixtures, name="decoder_output_MDN")) decoder.compile(loss=mdn.get_mixture_loss_func(LATENT_VECTOR_SIZE, num_mixtures), optimizer=keras.optimizers.Adam()) decoder.summary() #decoder.load_weights(path_to_weights) return (decoder, None)
def load_model(self, model_name): # with tf.device('/gpu:0'): # print('force using gpu...') decoder = tf.compat.v1.keras.Sequential() decoder.add( tf.compat.v1.keras.layers.CuDNNLSTM(HIDDEN_UNITS, batch_input_shape=(1, 1, NUMBER_DIM), return_sequences=True, stateful=True)) decoder.add( tf.compat.v1.keras.layers.CuDNNLSTM(HIDDEN_UNITS, stateful=True)) decoder.add(mdn.MDN(NUMBER_DIM, NUMBER_MIXTURES)) decoder.compile(loss=mdn.get_mixture_loss_func(NUMBER_DIM, NUMBER_MIXTURES), optimizer=tf.compat.v1.keras.optimizers.Adam()) decoder.summary() decoder.load_weights( model_name) # load weights independently from file print('Model Loaded!') return decoder
def _build_sequential(self, sequence_length, num_mixtures): # The RNN-mdn code from https://github.com/cpmpercussion/creative-prediction/blob/master/notebooks/7-MDN-Robojam-touch-generation.ipynb model = keras.Sequential() model.add( keras.layers.LSTM( NUM_LSTM_UNITS, input_shape=(sequence_length, LATENT_VECTOR_SIZE + ACTION_DIMENSIONALITY), return_sequences=False, name="Input_LSTM")) # TODO Return sequences returns the hidden state, and feeds that to the next layer. When I do this with the MDN, # I get an error, because it does not expect that input. I need to find a way to store the hidden state (for the # controller) without return sequences? #model.add(keras.layers.LSTM(NUM_LSTM_UNITS)) model.add(mdn.MDN(LATENT_VECTOR_SIZE, num_mixtures, name="Output_MDN")) model.compile(loss=mdn.get_mixture_loss_func(LATENT_VECTOR_SIZE, num_mixtures), optimizer=keras.optimizers.Adam()) model.summary() return (model, None)
def create_mdn_estimator(X, y, X_val, y_val, batch_size=32, nb_epochs=1000, nb_mixtures=1): input_shape = X.shape[1:] base_network = build_base_network(input_shape) input_a = Input(shape=input_shape) x = base_network(input_a) mixture_layer = mdn.MDN(1, nb_mixtures)(x) mdn_model = Model([input_a], [mixture_layer]) loss = mdn.get_mixture_loss_func(1, nb_mixtures) mdn_model.compile(loss=loss, optimizer='adam') tb_cb = TensorBoard(log_dir='./mdn_logs/{}'.format(time.time()), histogram_freq=0, batch_size=32, write_graph=True, write_grads=True, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq='epoch') mdn_model.fit(X, y, batch_size=batch_size, epochs=nb_epochs, validation_data=(X_val, y_val), verbose=1, callbacks=[tb_cb]) return mdn_model
# Sequential model model = keras.Sequential() # Add two LSTM layers, make sure the input shape of the first one is (?, 30, 3) model.add( keras.layers.LSTM(HIDDEN_UNITS, batch_input_shape=(None, SEQ_LEN, OUTPUT_DIMENSION), return_sequences=True)) model.add(keras.layers.LSTM(HIDDEN_UNITS)) #model.add(keras.layers.Dense(N_HIDDEN, batch_input_shape=(None, 1), activation='relu')) # Here's the MDN layer, need to specify the output dimension (3) and number of mixtures (10) model.add(mdn.MDN(OUTPUT_DIMENSION, NUMBER_MIXTURES)) # Now we compile the MDN RNN - need to use a special loss function with the right number of dimensions and mixtures. model.compile(loss=mdn.get_mixture_loss_func(OUTPUT_DIMENSION, NUMBER_MIXTURES), optimizer=keras.optimizers.Adam()) # Let's see what we have: model.summary() # In[37]: # Functions for slicing up data def slice_sequence_examples(sequence, num_steps): xs = [] for i in range(len(sequence) - num_steps - 1): example = sequence[i:i + num_steps] xs.append(example) return xs
x = Reshape((1, 128))(inputs) x = LSTM(512, return_sequences=True, input_shape=(1, 128))(x) x = Dropout(0.40)(x) x = LSTM(512, return_sequences=True)(x) x = Dropout(0.40)(x) x = LSTM(512)(x) x = Dropout(0.40)(x) x = Dense(1000, activation='relu')(x) outputs = mdn.MDN(outputDim, numComponents)(x) model = Model(inputs=inputs, outputs=outputs) print(model.summary()) # In[7]: opt = adam(lr=0.0005) model.compile(loss=mdn.get_mixture_loss_func(outputDim, numComponents), optimizer=opt) # In[8]: train = False #change to True to train from scratch if train: X = data[0:len(data) - 1] Y = data[1:len(data)] checkpoint = ModelCheckpoint(DANCENET_PATH, monitor='loss', verbose=1, save_best_only=True, mode='auto') callbacks_list = [checkpoint]
y_train = errors N_MIXES = 1 OUTPUT_DIMS = 2 sgd = keras.optimizers.SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True) adam = keras.optimizers.Adam() model = keras.Sequential() model.add(keras.layers.Dense(8, batch_input_shape=(None, 2), activation='relu')) model.add(keras.layers.Dropout(0.2)) model.add(keras.layers.Dense(64, activation='relu')) model.add(keras.layers.Dropout(0.2)) model.add(keras.layers.Dense(64, activation='tanh')) model.add(keras.layers.Dropout(0.1)) model.add(mdn.MDN(OUTPUT_DIMS, N_MIXES)) model.compile(loss=mdn.get_mixture_loss_func(OUTPUT_DIMS, N_MIXES), optimizer=sgd) model.summary() es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='auto', baseline=None, restore_best_weights=False) ckpt = keras.callbacks.ModelCheckpoint("enet.h5", monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False,
model = Model(inputs=[X,a0,c0], outputs=outputs) return model # In[11]: model = stroke_learn_model(Tx = Tx , n_a = n_a, n_x = n_x) # In[12]: opt = Adam(clipvalue=100) #model.compile(optimizer=opt, loss='mean_squared_error', metrics=['accuracy']) model.compile(loss=mdn.get_mixture_loss_func(output_dim,n_mix), optimizer=opt) # In[13]: a0 = np.zeros((m, n_a)) c0 = np.zeros((m, n_a)) outputs = list(Yoh.swapaxes(0,1)) # In[14]: #outputs[0].shape