def predict(self, x_train, y_train, x_test, y_test, embeddings, sequence_length, class_count): req_type = type(np.array([])) assert type(x_train) == req_type and type(x_test) == req_type assert type(y_train) == req_type and type(y_test) == req_type from keras.models import Model from keras.layers import Input, Dense, Dropout, Flatten, Embedding, LSTM, Bidirectional, LSTMCell, StackedRNNCells, RNN from keras.layers.merge import Concatenate from keras.optimizers import Adam, Adagrad from keras.regularizers import l2 input_shape = (sequence_length,) model_input = Input(shape=input_shape) print("Input tensor shape: ", int_shape(model_input)) model_embedding = Embedding(embeddings.shape[0], 100, input_length=sequence_length, name="embedding")(model_input) # model_embedding = Embedding(embeddings.shape[0], embeddings.shape[1], weights=[embeddings], name="embedding")(model_input) print("Embeddings tensor shape: ", int_shape(model_embedding)) # model_recurrent = Bidirectional(LSTM(embeddings.shape[1], activation='relu', dropout=0.2))(model_embedding) ##################################################################################################################################### # cells_forward = [LSTMCell(units=self.output_size), LSTMCell(units=self.output_size), LSTMCell(units=self.output_size)] # cells_backward = [LSTMCell(units=self.output_size), LSTMCell(units=self.output_size), LSTMCell(units=self.output_size)] cells_forward = [LSTMCell(units=self.output_size)] * 3 cells_backward = [LSTMCell(units=self.output_size)] * 3 # LSTM_forward = RNN(cells_forward, go_backwards=False)(model_embedding) # LSTM_backward = RNN(cells_backward, go_backwards=True)(model_embedding) cells_forward_stacked = StackedRNNCells(cells_forward) cells_backward_stacked = StackedRNNCells(cells_backward) LSTM_forward = RNN(cells_forward_stacked, go_backwards=False)(model_embedding) LSTM_backward = RNN(cells_backward_stacked, go_backwards=True)(model_embedding) model_recurrent = Concatenate(axis=-1)([LSTM_forward, LSTM_backward]) # model_recurrent = Bidirectional(cells_forward)(model_embedding) ###################################################################################################################################### model_hidden = Dropout(0.5)(model_recurrent) model_output = Dense(class_count, activation="softmax", kernel_regularizer=l2(0.1), bias_regularizer=l2(0.1))(model_hidden) model = Model(model_input, model_output) optimizer = Adam(lr=self.learning_rate) # optimizer = Adagrad(lr=self.learning_rate) model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]) # model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) # model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.num_epochs, # validation_data=(x_test, y_test), verbose=2, shuffle=True) model.fit(x_train, y_train, batch_size=self.batch_size, epochs=self.num_epochs, validation_split=0.2, verbose=2, shuffle=True) score, acc = model.evaluate(x_test, y_test, batch_size=self.batch_size) print('\nTest score:', score) print('Test accuracy:', acc, '\n') test_model(model, x_test, y_test) return 0
def lstm_reshape(self, inputs, name_prefix, index, reshaped_inputs=None, initial=False): name_prefix = "{0}_{1}_{2}".format(self.controller_network_name, name_prefix, index) cell = LSTMCell( self.lstm_cell_units, kernel_initializer=get_weight_initializer(initializer="lstm"), recurrent_initializer=get_weight_initializer(initializer="lstm")) if initial: x = RNN(cell, return_state=True, name="{0}_{1}".format(name_prefix, "lstm"))(inputs) else: x = RNN(cell, return_state=True, name="{0}_{1}".format(name_prefix, "lstm"))(reshaped_inputs, initial_state=inputs[1:]) rx = Reshape((-1, self.lstm_cell_units), name="{0}_{1}".format(name_prefix, "reshape"))(x[0]) return x, rx
def gen_RNN(): #lstm = CuDNNLSTM(6, stateful=True, return_sequences=True)(inp) #lstm = LeakyReLU()(lstm) #lstm = CuDNNLSTM(3, stateful=True, return_sequences=True)(lstm) #lstm = Activation('sigmoid')(lstm) input = Input(batch_shape=(1, 1, 64 * 64 * 3 + num_classes)) lstm = Concatenate()( [Dense(32, activation='elu', name='zoinks')(input), input]) cells = [] for i in range(2): cells.append( LSTMCell(256, recurrent_dropout=0.05, implementation=2, recurrent_initializer='glorot_normal')) lstm = RNN(cells, stateful=True, name="RNN_yoooo")(lstm) lstm = Dense(128, activation='tanh', name='jinkies')(lstm) lstm = Dense(3, activation='tanh', name='yikes')(lstm) lstm = Reshape(( 1, 3, ))(lstm) model = Model(inputs=input, outputs=lstm) return model
def cell(): if args.cell == 'vanilla': return VanillaCell(args.hidden_dim, use_diag=False, n_rotations=n_rotations, activation=args.activation) else: return LSTMCell(args.hidden_dim)
def __init__(self, gpus=1, batch_size=50, segment_size=12, output_size=12, window_size=15, cnn_filters=[2,3,4], hidden_sizes=[10,10], learning_rate=0.0001, learning_rate_decay=0, create_tensorboard=False): self.segment_size = segment_size self.output_size = output_size self.gpus = gpus # Define an input sequence. # 1 refers to a single channel of the input inputs = Input(shape=(segment_size, window_size, window_size, 1), name="input") # cnns out = TimeDistributed(Conv2D(cnn_filters[0], kernel_size=5, activation='relu', padding='same'), name="cnn_1")(inputs) out = TimeDistributed(MaxPooling2D(), name=f"max_pool")(out) out = TimeDistributed(Conv2D(cnn_filters[1], kernel_size=5, activation='relu', padding='same'), name="cnn_2")(out) out = TimeDistributed(AveragePooling2D(), name=f"avg_pool_1")(out) out = TimeDistributed(Conv2D(cnn_filters[2], kernel_size=5, activation='relu', padding='same'), name="cnn_3")(out) out = TimeDistributed(AveragePooling2D(), name=f"avg_pool_2")(out) out = TimeDistributed(Flatten(), name="flatten_before_lstm")(out) cells = [LSTMCell(hidden_sizes[0]), LSTMCell(hidden_sizes[1])] out = RNN(cells)(out) # out = Flatten(name="flatten_after_lstm")(out) out = Dense(100, activation='relu', name=f"mlp_relu")(out) out = Dense(output_size, activation='linear', name=f"mlp_linear")(out) self.model = Model(inputs=inputs, outputs=out) self.model = model_device_adapter.get_device_specific_model(self.model, gpus) optimizer = Adam(lr=learning_rate, decay=learning_rate_decay) self.model.compile(loss='mse', optimizer=optimizer) print(self.model.summary()) super(CnnLSTM, self).__init__(batch_size=batch_size, create_tensorboard=create_tensorboard)
def cell(): if args.cell == 'vanilla': cell_ = VanillaCell(args.hidden_dim, use_diag=False, n_rotations=args.n_rotations or 2 * ceil(log2(args.hidden_dim)), activation=args.activation) return cell_ else: return LSTMCell(args.hidden_dim)
def create_stacked_lstms(self, hidden_size, num_layers, return_sequences, return_state): # Create a list of RNN Cells, these are then concatenated into a single layer # with the RNN layer. cells = [] for i in range(num_layers): cells.append(LSTMCell(hidden_size)) # TODO: try regularizers return RNN(cells, return_sequences=return_sequences, return_state=return_state)
def get_model_soft_sharing_lstm_singleoutput(emb_matrix, time_steps, learning_rate=0.001, n_classes=1, decay=0.1, layers=3): cells_1 = [LSTMCell(128, dropout=0.2) for i in range(layers)] cells_2 = [LSTMCell(128, dropout=0.2) for i in range(layers)] input = Input(shape=(time_steps, ), dtype='int32') embedding = Embedding(input_dim=emb_matrix.shape[0], output_dim=emb_matrix.shape[1], weights=[emb_matrix], input_length=time_steps, trainable=True) sequence_input = embedding(input) x = Bidirectional(RNN(cells_1, return_sequences=True))(sequence_input) x = Bidirectional(RNN(cells_2, return_sequences=False))(x) x = Dropout(0.2)(x) x = Dense(256, activation='relu')(x) x = Dropout(0.2)(x) x = Dense(128, activation='relu')(x) x = Dropout(0.2)(x) x = Dense(128, activation='relu')(x) x = Dropout(0.2)(x) x = Dense(64, activation='relu')(x) x = Dropout(0.2)(x) x = Dense(64, activation='relu')(x) preds = Dense(n_classes, activation='sigmoid')(x) model = Model(input, preds) adam = optimizers.Adam(lr=learning_rate, decay=DECAY) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) print(model.summary()) return model
def build_model(input_shape): batch_shape = (BATCH_SIZE, ) + input_shape inputs = [] for _ in range(FRAMES): input = Input(batch_shape=batch_shape) inputs.append(input) layers = 1 conv_outputs = [] for input in inputs: filters = 8 conv_input = input for _ in range(layers): conv = Conv2D(filters, kernel_size=3, activation='relu', padding='same')(conv_input) bnrm = BatchNormalization()(conv) pool = MaxPooling2D(8)(bnrm) conv_input = pool filters *= 2 flattened = Flatten()(conv_input) conv_outputs.append(flattened) stacked = Stack()(conv_outputs) units = FRAMES cell = LSTMCell(units) lstm = RNN(cell, unroll=True)(stacked) drpt1 = Dropout(0.2)(lstm) dense = Dense(units // 2, activation='relu')(drpt1) drpt2 = Dropout(0.2)(dense) output = Dense(1)(drpt2) model = km.Model(inputs=inputs, outputs=output) adam = optmzrs.Adam(learning_rate=0.0001, amsgrad=True) model.compile(loss='mse', optimizer=adam, metrics=['mae']) model.summary() return model
def select_cell(cell_type, hidden_dim, l1=0.0, l2=0.0): """Select an RNN cell and initialises it with hidden_dim units.""" if cell_type == 'vanilla': return SimpleRNNCell(units=hidden_dim, kernel_regularizer=l1_l2(l1=l1, l2=l2), recurrent_regularizer=l1_l2(l1=l1, l2=l2)) elif cell_type == 'gru': return GRUCell(units=hidden_dim, kernel_regularizer=l1_l2(l1=l1, l2=l2), recurrent_regularizer=l1_l2(l1=l1, l2=l2)) elif cell_type == 'lstm': return LSTMCell(units=hidden_dim, kernel_regularizer=l1_l2(l1=l1, l2=l2), recurrent_regularizer=l1_l2(l1=l1, l2=l2)) else: raise ValueError( 'Unknown cell type. Please select one of: vanilla, gru, or lstm.')
def build_model(allow_cudnn_kernel=True): # CuDNN is only available at the layer level, and not at the cell level. # This means `LSTM(units)` will use the CuDNN kernel, # while RNN(LSTMCell(units)) will run on non-CuDNN kernel. if allow_cudnn_kernel: # The LSTM layer with default options uses CuDNN. lstm_layer = LSTM(units, input_shape=(None, input_dim)) else: # Wrapping a LSTMCell in a RNN layer will not use CuDNN. lstm_layer = RNN( LSTMCell(units), input_shape=(None, input_dim) ) model = keras.models.Sequential([ lstm_layer, BatchNormalization(), Dense(output_size), ]) return model
def call(self, inputs): """ Inputs should be [message, previous_state, previous_memory], returns [next_state, next_memory] """ outputs = LSTMCell.call(self, inputs[0], [inputs[1], inputs[2]]) return [outputs[0], outputs[1][1]]
os.makedirs(config.log_dir) tf.set_random_seed(config.seed) np.random.seed(config.seed) num_steps_ahead = 1 n_layers = 1 layers = [config.m] * n_layers # Encoder # driving series with shape (T, n) encoder_inputs = Input(shape=(None, config.n)) # add endogenous series encoder = RNN([LSTMCell(units) for units in layers], return_state=True) encoder_out_and_states = encoder(encoder_inputs) # Decoder decoder_inputs = Input(shape=(config.T - num_steps_ahead, )) z_i = Dense(64)(decoder_inputs) encoder_out = concatenate(encoder_out_and_states + [z_i]) z = Dense(256)(encoder_out) decoder_outs = Dense(len(config.target_cols), activation="linear")(z) model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outs) model.compile(optimizer="adam", loss="mse")
def build(self, input_shape): LSTMCell.build(self, input_shape[0])
def __init__(self, output_dim, attn_activation='tanh', single_attention_param=False, **kwargs): self.attn_activation = activations.get(attn_activation) self.single_attention_param = single_attention_param self.cell = LSTMCell(output_dim, **kwargs) super(AttentionLSTMCell, self).__init__(output_dim, **kwargs)
gl_next = glimpse_net(loc) loc_mean_arr.append(loc_mean) sampled_loc_arr.append(loc) return gl_next x = Input(shape=input_shape) y = Input(batch_shape=[None]) #x_expanded = K.tile(x, [config.M, 1]) #y_expanded = K.tile(y, [config.M]) init_loc = Lambda(random_location)(x) glimpse = glimpse_net([init_loc, x]) lstm_cell = LSTMCell(config.cell_size) predictions = Dense(config.num_classes, activation='softmax')(x) model = Model(x, predictions) model.compile(loss=keras.losses.categorical_crossentropy, optimizer='Adam', metrics=['accuracy']) model.summary() model.fit(x_train, y_train, batch_size=config.batch_size, epochs=1, verbose=1, validation_data=(x_va, y_va))
class AttentionLSTMCell(LSTMCell): def __init__(self, output_dim, attn_activation='tanh', single_attention_param=False, **kwargs): self.attn_activation = activations.get(attn_activation) self.single_attention_param = single_attention_param self.cell = LSTMCell(output_dim, **kwargs) super(AttentionLSTMCell, self).__init__(output_dim, **kwargs) def build(self, input_shape): constants_shape = input_shape[-1] self.cell.build(input_shape[0]) attention_dim = constants_shape[-1] output_dim = self.units self.U_a = self.add_weight(shape=(output_dim, output_dim), name='U_a', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) self.b_a = self.add_weight(shape=(output_dim,), name='b_a', initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) self.U_m = self.add_weight(shape=(attention_dim, output_dim), name='U_a', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) self.b_m = self.add_weight(shape=(output_dim,), name='b_m', initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) if self.single_attention_param: self.U_s = self.add_weight(shape=(output_dim, 1), name='U_s', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) self.b_s = self.add_weight(shape=(output_dim, 1), name='b_s', initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.U_s = self.add_weight(shape=(output_dim, output_dim), name='U_s', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) self.b_s = self.add_weight(shape=(output_dim,), name='b_s', initializer=self.bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) if self._initial_weights is not None: self.set_weights(self._initial_weights) del self._initial_weights def call(self, x, states, training=None, constants=None): h, [h, c] = self.cell.call(x, states, training) constants = constants[0] attention = K.dot(constants, self.U_m) + self.b_m m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a) # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems # which I think might have been caused by the exponential function -> gradients blow up) s = K.sigmoid(K.dot(m, self.U_s) + self.b_s) if self.single_attention_param: h = h * K.repeat_elements(s, self.units, axis=1) else: h = h * s return h, [h, c]
def buildNetwork(self): # define placeholder variables for input, encoding layers state = K.placeholder(shape=[None, 2, self.dataset_size]) prev_reward = K.placeholder(shape=[None, 1]) prev_actions = K.placeholder(shape=[None], dtype=tf.int32) prev_actions_onehot = K.one_hot( prev_actions, self.act_dim) # ToDo: change to action space dim timestep = K.placeholder(shape=[None, 1]) hidden = K.concatenate( [slim.flatten(state), prev_reward, prev_actions_onehot, timestep], 1) # initialize Lstm cells lstm_cells = LSTMCell(units=self.lstm_param_list[0], activation=self.lstm_param_list[1], recurrent_activation=self.lstm_param_list[2], use_bias=self.lstm_param_list[3], kernel_initializer=self.lstm_param_list[4], recurrent_initializer=self.lstm_param_list[5], bias_initializer=self.lstm_param_list[6], unit_forget_bias=self.lstm_param_list[7], kernel_regularizer=self.lstm_param_list[8], recurrent_regularizer=self.lstm_param_list[9], bias_regularizer=self.lstm_param_list[10], kernel_constraint=self.lstm_param_list[11], recurrent_constraint=self.lstm_param_list[12], bias_constraint=self.lstm_param_list[13], dropout=self.lstm_param_list[14], recurrent_dropout=self.lstm_param_list[15], implementation=self.lstm_param_list[16]) # initialize cell state c_init = np.zeros((1, lstm_cells.units)) h_init = np.zeros((1, lstm_cells.units)) state_init = [c_init, h_init] c_in = K.placeholder([1, lstm_cells.units]) h_in = K.placeholder([1, lstm_cells.units]) state_in = (c_in, h_in) lstm_in = K.expand_dims(hidden, [0]) step_size = K.shape(prev_reward)[:1] state_tuple = rnn.LSTMStateTuple(c_in, h_in) lstm_layer = RNN(lstm_cells, lstm_in, return_state='true') #lstm_c, lstm_h = lstm_state #state_out = (lstm_c[:1, :], lstm_h[:1, :]) #lstm_output = tf.reshape(lstm_out, [-1, 48]) actions = K.placeholder(shape=[None], dtype=tf.int32) actions_onehot = K.one_hot(actions, self.act_dim) policy = Dense(256, self.act_dim, activation='softmax', weights=normalized_columns_initializer(0.01)) value = Dense(256, 1, activation=None, weights=normalized_columns_initializer(1.0)) #inp = Input(self.env_dim) # lstm_layer = LSTM(units=self.lstm_param_list[0], activation=self.lstm_param_list[1], # recurrent_activation=self.lstm_param_list[2], use_bias=self.lstm_param_list[3], # kernel_initializer=self.lstm_param_list[4], recurrent_initializer=self.lstm_param_list[5], # bias_initializer=self.lstm_param_list[6], unit_forget_bias=self.lstm_param_list[7], # kernel_regularizer=self.lstm_param_list[8], recurrent_regularizer=self.lstm_param_list[9], # bias_regularizer=self.lstm_param_list[10], activity_regularizer=self.lstm_param_list[17], # kernel_constraint=self.lstm_param_list[11], # recurrent_constraint=self.lstm_param_list[12], bias_constraint=self.lstm_param_list[13], # dropout=self.lstm_param_list[14], recurrent_dropout=self.lstm_param_list[15], # implementation=self.lstm_param_list[16], return_sequences=self.lstm_param_list[18], # return_state=self.lstm_param_list[19]) # # model = Sequential() # model.add(lstm_layer,input_shape = [self.env_dim,]) # #Model(inp, lstm_layer, Dense(1,)) # # model.compile(optimzer=self.optimizer) # Todo: cast outputs into one struct return policy, value, actions, actions_onehot, lstm_state, state_in, state_init, state, prev_reward, \ prev_actions, timestep, prev_actions_onehot
conv_extractor = Sequential() for filter_dim in convs: conv_extractor.add( Conv1D(filter_dim, kernel_size=kernel_size, strides=stride, padding='valid')) encoder_features = conv_extractor(encoder_inputs) encoder_cells = [] for hidden_neurons in layers: encoder_cells.append( LSTMCell(hidden_neurons, kernel_regularizer=regulariser, recurrent_regularizer=regulariser, bias_regularizer=regulariser)) encoder = RNN(encoder_cells, return_sequences=True, return_state=True) encoder_outputs_and_states = encoder(encoder_features) encoder_outs = encoder_outputs_and_states[0] encoder_states = encoder_outputs_and_states[1:] decoder_inputs = Input(shape=(None, input_dim)) decoder_features = conv_extractor(decoder_inputs) decoder_cells = [] for hidden_neurons in layers: decoder_cells.append( LSTMCell(hidden_neurons, kernel_regularizer=regulariser, recurrent_regularizer=regulariser,
LSTM(20, return_sequences=True, input_shape=(None, 1)), LSTM(20), Dense(1), ]) model.compile('adam', 'mse') model.fit(x_train, y_train, epochs=500) print(model.predict(x_train)) #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # not tested # general-purpose `RNN` layer from keras.layers import RNN, LSTMCell model = Sequential([ RNN(LSTMCell(20), return_sequences=True, input_shape=(None, 1)), RNN(LSTMCell(20), return_sequences=True), TimeDistributed(Dense(10)) ]) # same as: model = Sequential([ LSTM(20, return_sequences=True, input_shape=(None, 1)), LSTM(20, return_sequences=True), TimeDistributed(Dense(10)) ]) # use `LSTM` since it's optimized on gpu # use `RNN` when you define custom cells #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # not tested # https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding # https://medium.com/analytics-vidhya/understanding-embedding-layer-in-keras-bbe3ff1327ce
def SharmaNet(input_shape, train_all_baseline=False, weight_decay=1e-5, train_mode="default", audio_dim=0, yam_dim=0): frame_input = Input(input_shape) regs = [ regularizers.l2(weight_decay), regularizers.l2(weight_decay), regularizers.l2(weight_decay), regularizers.l2(weight_decay), regularizers.l2(weight_decay), regularizers.l2(weight_decay) ] cells = [ LSTMCell(1024, kernel_regularizer=regs[0], recurrent_regularizer=regs[1]) ] seres50 = SEResNet50(input_shape=(input_shape[1], input_shape[2], input_shape[3]), classes=7) backbone = Model(seres50.input, seres50.layers[-4].output) x = TimeDistributed(backbone, input_shape=input_shape)(frame_input) t, h, w, c = [int(x) for x in x.shape[1:]] reshape_dim = (t, h * w, c) x = Reshape(reshape_dim)(x) features_mean_layer = Lambda(lambda y: tf.reduce_mean(y, axis=2))(x) features_mean_layer = Lambda(lambda y: tf.reduce_mean(y, axis=1))( features_mean_layer) dense_h0 = Dense(1024, activation='tanh', kernel_regularizer=regs[2])(features_mean_layer) dense_c0 = Dense(1024, activation='tanh', kernel_regularizer=regs[3])(features_mean_layer) rnn_attention = RNNStackedAttention(reshape_dim, cells, return_sequences=True, unroll=True) x = rnn_attention(x, initial_state=[dense_h0, dense_c0]) if train_mode != "default": if "early" in train_mode: audio_input = Input(shape=(input_shape[0], audio_dim)) elif "joint" in train_mode: yn = YAMNet(weights='keras_yamnet/yamnet_conv.h5', classes=7, classifier_activation='softmax', input_shape=(yam_dim, 64)) yamnet = Model(input=yn.input, output=yn.layers[-3].output) audio_input = yamnet.output x = Concatenate(name='fusion1')([x, audio_input]) input_tensors = [audio_input, frame_input] else: input_tensors = frame_input x = TimeDistributed( Dense(100, activation='tanh', kernel_regularizer=regs[4], name='ff_logit_lstm'))(x) x = TimeDistributed(Dropout(0.5))(x) x = TimeDistributed( Dense(7, activation='softmax', kernel_regularizer=regs[5], name='ff_logit'))(x) x = Lambda(lambda y: tf.reduce_mean(y, axis=1))(x) model = Model(input_tensors, x) model.layers[1].trainable = train_all_baseline return model
output_activation = 'linear' model_type = args.model initial_lr = args.lr decay = initial_lr / (epochs * steps_per_epoch) optimizer = optimizers.RMSprop(lr=initial_lr, decay=decay) tensorboard = args.tensorboard if args.noclip: recurrent_clip = -1 if args.nodecay: decay = 0 input_tensor = Input((T, 2)) if model_type == 'lstm': rnn_out = RNN(LSTMCell(H))(input_tensor) optimizers.Adam(lr=initial_lr, decay=decay, amsgrad=True) else: cls = VanillaCell rnn_1 = RNN(cls(H, n_rotations=K, activation=recurrent_activation, recurrent_clip=recurrent_clip, use_diag=not args.nodiag), return_sequences=True) rnn_2 = RNN(cls(H, n_rotations=K, activation=recurrent_activation, recurrent_clip=recurrent_clip, use_diag=not args.nodiag)) rnn_out = rnn_2(rnn_1(input_tensor)) dense = Dense(1, activation='linear')
def __init__(self, is_training, batch_size, scaler, **model_kwargs): self._scaler = scaler # Train and loss self._loss = None self._mse = None self._train_op = None cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000)) horizon = int(model_kwargs.get('horizon', 1)) max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0)) n_rnn_layers = int(model_kwargs.get('n_rnn_layers', 1)) rnn_units = int(model_kwargs.get('rnn_units')) seq_len = int(model_kwargs.get('seq_len')) use_curriculum_learning = bool( model_kwargs.get('use_curriculum_learning', False)) input_dim = int(model_kwargs.get('input_dim', 1)) output_dim = int(model_kwargs.get('output_dim', 1)) # Input (batch_size, timesteps, num_sensor, input_dim) self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, input_dim), name='inputs') # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension. self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, 1), name='labels') # GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim)) GO_SYMBOL = tf.zeros(shape=(batch_size, output_dim)) cell = LSTMCell(units=rnn_units) cell_with_projection = LSTMCell(units=rnn_units) encoding_cells = [cell] * n_rnn_layers decoding_cells = [cell] * (n_rnn_layers - 1) + [cell_with_projection] encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells, state_is_tuple=True) decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells, state_is_tuple=True) global_step = tf.train.get_or_create_global_step() # Outputs: (batch_size, timesteps, num_nodes, output_dim) with tf.variable_scope('LSTM_SEQ'): inputs = tf.unstack(tf.reshape(self._inputs, (batch_size, seq_len, input_dim)), axis=1) labels = tf.unstack(tf.reshape(self._labels[..., :output_dim], (batch_size, horizon, output_dim)), axis=1) labels.insert(0, GO_SYMBOL) def _loop_function(prev, i): if is_training: # Return either the model's prediction or the previous ground truth in training. if use_curriculum_learning: c = tf.random_uniform((), minval=0, maxval=1.) threshold = self._compute_sampling_threshold( global_step, cl_decay_steps) result = tf.cond(tf.less(c, threshold), lambda: labels[i], lambda: prev) else: result = labels[i] else: # Return the prediction of the model in testing. result = prev return result _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32) # layers = RNN(encoding_cells, dtype=tf.float32) # y = layers(inputs) # _, enc_state = RNN(encoding_cells, inputs, dtype=tf.float32) outputs, final_state = legacy_seq2seq.rnn_decoder( labels, enc_state, decoding_cells, loop_function=_loop_function) # Project the output to output_dim. outputs = tf.stack(outputs[:-1], axis=1) self._outputs = tf.reshape(outputs, (batch_size, horizon, output_dim), name='outputs') self._merged = tf.summary.merge_all()
from keras.callbacks import ModelCheckpoint, EarlyStopping from keras.constraints import non_neg model = Sequential() model.add( Conv2D(filters=1, kernel_size=(timeperiod_window_size, num_features), strides=(timeperiod_window_size, num_features), padding='valid', data_format='channels_first', activation='tanh', use_bias=False, kernel_constraint=non_neg(), input_shape=(1, num_steps * timeperiod_window_size, num_features))) model.add(Reshape((num_steps, 1))) cells = [LSTMCell(lstm_units), LSTMCell(lstm_units)] model.add(RNN(cells, return_sequences=True)) model.add(Dense(1)) model.add(Flatten()) rmsprop = optimizers.RMSprop(lr=learning_rate) model.summary() model.compile(optimizer=rmsprop, loss='mean_squared_error') #Running the computational graph checkpointer = ModelCheckpoint(filepath=r'.\factor_analysis_weights.hdf5', verbose=1, save_best_only=True) earlystopping = EarlyStopping(monitor='val_loss', patience=100) model.fit(trainning_features, trainning_targets, batch_size=5,
implementation = 1 # optional parameters in LSTM: set to default values according documentation (https://keras.io/layers/recurrent/#rnn) # go_backwards=False # stateful=False # unroll=False # Todo: Why are the params in cell and model?? lstmCells = LSTMCell(numUnits, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, unit_forget_bias=unit_forget_bias, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation) lstmNetwork = LSTM(lstmCells, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer,