def LSTM_model_memory_batch(batch_size,epoch): model = Sequential() model.add(LSTM(4, batch_input_shape=(batch_size, self.look_back, 1), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(epoch): model.fit(self.trainX, self.trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False) model.reset_states() return model
def fit_lstm(train, batch_size, nb_epoch, neurons): X, y = train[:, 0:-1], train[:, -1] X = X.reshape(X.shape[0], 1, X.shape[1]) model = Sequential() model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(nb_epoch): model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False) model.reset_states() return model
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ for ret_seq in [True, False]: layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, input_shape=(timesteps, input_dim)) layer.input = K.variable(np.ones((nb_samples, timesteps, input_dim))) layer.get_config() for train in [True, False]: out = K.eval(layer.get_output(train)) # Make sure the output has the desired shape if ret_seq: assert(out.shape == (nb_samples, timesteps, output_dim)) else: assert(out.shape == (nb_samples, output_dim)) mask = layer.get_output_mask(train) # check statefulness layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None, batch_input_shape=(nb_samples, timesteps, input_dim)) model = Sequential() model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert(out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps, input_dim)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps, input_dim))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps, input_dim))) assert(out4.max() != out5.max())
def test_statefulness(layer_class): model = Sequential() model.add(embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps))) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps))) assert(out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert(out4.max() != out5.max()) # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5)
def hyper_build_model(self,space,predict,custom_batch_size=None): conf = self.conf model_conf = conf['model'] rnn_size = model_conf['rnn_size'] rnn_type = model_conf['rnn_type'] regularization = model_conf['regularization'] dropout_prob = model_conf['dropout_prob'] length = model_conf['length'] pred_length = model_conf['pred_length'] skip = model_conf['skip'] stateful = model_conf['stateful'] return_sequences = model_conf['return_sequences'] output_activation = conf['data']['target'].activation#model_conf['output_activation'] num_signals = conf['data']['num_signals'] batch_size = self.conf['training']['batch_size'] if predict: batch_size = self.conf['model']['pred_batch_size'] #so we can predict with one time point at a time! if return_sequences: length =pred_length else: length = 1 if custom_batch_size is not None: batch_size = custom_batch_size if rnn_type == 'LSTM': rnn_model = LSTM elif rnn_type == 'SimpleRNN': rnn_model =SimpleRNN else: print('Unkown Model Type, exiting.') exit(1) batch_input_shape=(batch_size,length, num_signals) model = Sequential() for _ in range(model_conf['rnn_layers']): model.add(rnn_model(rnn_size, return_sequences=return_sequences,batch_input_shape=batch_input_shape, stateful=stateful,kernel_regularizer=l2(regularization),recurrent_regularizer=l2(regularization), bias_regularizer=l2(regularization),dropout=dropout_prob,recurrent_dropout=dropout_prob)) model.add(Dropout(space['Dropout'])) if return_sequences: model.add(TimeDistributed(Dense(1,activation=output_activation))) else: model.add(Dense(1,activation=output_activation)) model.reset_states() return model
def build_model(predict,batch_size,length,featurelen): if predict: batch_size = length = 1 model = Sequential() model.add(LSTM(10 ,return_sequences=True, batch_input_shape=(batch_size, length , featurelen), stateful=True)) model.add(Dropout(0.2)) model.add(LSTM(10 , return_sequences=True,stateful=True)) model.add(Dropout(0.2)) model.add(TimeDistributed(Dense( featurelen ))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.reset_states() return model
def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons): # reshape training into [samples, timesteps, features] X, y = train[:, 0:n_lag], train[:, n_lag:] X = X.reshape(X.shape[0], 1, X.shape[1]) # design network model = Sequential() model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True)) model.add(Dense(y.shape[1])) model.compile(loss='mean_squared_error', optimizer='adam') # fit network for i in range(nb_epoch): model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False) model.reset_states() return model
def fit_lstm(train, n_batch, nb_epoch, n_neurons): X, y = train[:, 0:-1], train[:, -1] X = X.reshape(X.shape[0], 1, X.shape[1]) print('x=', X) model = Sequential() # https://keras.io/layers/recurrent/#lstm # model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(nb_epoch): model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False) model.reset_states() return model
def run(s): word_to_num, num_to_word, word_cnt = build_dict(s) X, y = build_xy(s, word_to_num, num_to_word, word_cnt) print X.shape,y.shape dim = len(word_to_num) print("Number of words: %d" % dim) print("Number of sentences: %d" % len(X)) model = Sequential() model.add(LSTM(output_dim=50, input_dim=dim, activation='sigmoid', inner_activation='hard_sigmoid')) # model.add(Dropout(0.5)) model.add(Dense(200)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) #model.summary() model.fit(X, y, batch_size=200, nb_epoch=10) N = 200 # num of sents to be generated for i in range(N): model.reset_states() k = start_token res = [k] while k != end_token: k_oh = num_to_onehot(k, dim) kk_oh = model.predict(k_oh) kk = onehot_to_num(kk_oh, dim) k = kk res.append(k) s = "" for c in res: s = s + num_to_word(c) print s
class LSTM_RNN: def __init__(self, look_back, dropout_probability = 0.2, init ='he_uniform', loss='mse', optimizer='rmsprop'): self.rnn = Sequential() self.look_back = look_back self.rnn.add(LSTM(10, stateful = True, batch_input_shape=(1, 1, 1), init=init)) self.rnn.add(Dropout(dropout_probability)) self.rnn.add(Dense(1, init=init)) self.rnn.compile(loss=loss, optimizer=optimizer) def batch_train_test(self, trainX, trainY, testX, testY, nb_epoch=150): print('Training LSTM-RNN...') for epoch in range(nb_epoch): print('Epoch '+ str(epoch+1) +'/{}'.format(nb_epoch)) training_losses = [] testing_losses = [] for i in range(len(trainX)): y_actual = trainY[i] for j in range(self.look_back): training_loss = self.rnn.train_on_batch(np.expand_dims(np.expand_dims(trainX[i][j], axis=1), axis=1), np.array([y_actual])) training_losses.append(training_loss) self.rnn.reset_states() print('Mean training loss = {}'.format(np.mean(training_losses))) mean_testing_loss = [] for i in range(len(testX)): for j in range(self.look_back): testing_loss = self.rnn.test_on_batch(np.expand_dims(np.expand_dims(testX[i][j], axis=1), axis=1), np.array([testY[i]])) testing_losses.append(testing_loss) self.rnn.reset_states() for j in range(self.look_back): y_pred = self.rnn.predict_on_batch(np.expand_dims(np.expand_dims(testX[i][j], axis=1), axis=1)) self.rnn.reset_states() mean_testing_loss = np.mean(testing_losses) print('Mean testing loss = {}'.format(mean_testing_loss)) return mean_testing_loss
optimizer='adam', metrics=['accuracy']) for i in range(epochs): model.fit( aprender_x, aprender_y, epochs=1, batch_size=ronda, #callbacks=callbacks_list, validation_data=(evaluar_x, evaluar_y), verbose=verbose, shuffle=False) model.reset_states() # re-define the batch size ronda = 1 #creacion de la red con LSTM new_model = Sequential() new_model.add( LSTM(32, batch_input_shape=(ronda, 1, lookback), return_sequences=True, stateful=True)) new_model.add(LSTM(16, input_shape=(1, lookback))) new_model.add(Dense(1, kernel_initializer='normal', activation='sigmoid')) # copy weights
def test_convolutional_recurrent_statefulness(): data_format = 'channels_last' return_sequences = False inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) # Tests for statefulness model = Sequential() kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) assert(out4.max() != out5.max()) # cntk doesn't support eval convolution with static # variable, will enable it later if K.backend() != 'cntk': # check regularizers kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': regularizers.L1L2(l1=0.01), 'recurrent_regularizer': regularizers.L1L2(l1=0.01), 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(inputs.shape) assert len(layer.losses) == 3 assert layer.activity_regularizer output = layer(K.variable(np.ones(inputs.shape))) assert len(layer.losses) == 4 K.eval(output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1}, input_shape=inputs.shape) # check state initialization layer = convolutional_recurrent.ConvLSTM2D( filters=filters, kernel_size=(num_row, num_col), data_format=data_format, return_sequences=return_sequences) layer.build(inputs.shape) x = Input(batch_shape=inputs.shape) initial_state = layer.get_initial_state(x) y = layer(x, initial_state=initial_state) model = Model(x, y) assert (model.predict(inputs).shape == layer.compute_output_shape(inputs.shape))
class Learner(object): def __init__(self, options): self.options = options n = options.n l = int(options.s) self.n = n # number of dimensions in feature vectors self.l = l # nodes per level self.L = options.N # number of items self.N = options.c # number of end competitors self.LL = self.L + self.N + 2 # number of items (including start/end markers) self.length = self.LL # for test api self.M = np.zeros(self.n) # memory trace self.t = options.t # recall threshold self.shuffle_items() self.model = Sequential() self.model.add(Bidirectional(GRU(l, return_sequences=True, stateful=True), batch_input_shape=(1,1,n))) #self.model.add(BatchNormalization()) #self.model.add(Activation('tanh')) #self.model.add(Bidirectional(GRU(l, return_sequences=True, stateful=True))) self.model.add(Bidirectional(GRU(l, stateful=True))) #self.model.add(Activation('tanh')) #self.model.add(Bidirectional(GRU(l, stateful=True))) #self.model.add(Activation('tanh')) #self.model.add(LeakyReLU(alpha=0.2)) #self.model.add(BatchNormalization()) #self.model.add(LeakyReLU(alpha=0.2)) #self.model.add(Activation('tanh')) self.model.add(Dense(n)) # defaults: lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0 adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) self.model.compile(loss='cosine_proximity', optimizer=adam) #mse, #rmsprop self.initial_weights = self.model.get_weights() def __len__(self): return self.L + 2 def shuffle_items(self): self.items = np.random.normal(0, 1.0/self.n, (self.LL, self.n)) # items self.x = self.items[0] # start token self.y = self.items[-(self.N+1)] # end token def reset(self, weights=None): # https://github.com/fchollet/keras/issues/341 self.shuffle_items() if weights is None: weights = self.initial_weights weights = [np.random.permutation(w.flat).reshape(w.shape) for w in weights] self.model.set_weights(weights) def trial(self): for e in range(self.options.e): self.model.reset_states() self.model.fit( self.items.reshape(self.LL, 1, self.n), np.roll(self.items, -1, axis=0), nb_epoch=1, batch_size=1, shuffle=False, verbose=0) def deblur(self, a, j = 0): opts = self.items[j:] d = opts.dot(a) i = np.argmax(d) t = math.sqrt(a.dot(opts[i]) ** 2) if t < self.t: return None, -1 return opts[i], i+j def probe(self, f, j): g_ = self.model.predict(f.reshape(1,1,self.n))[0] if self.options.oi: self.model.fit( self.items[j-1].reshape(1, 1, self.n), g_.reshape(1,self.n), nb_epoch=1, batch_size=1, shuffle=False, verbose=0) g_ = self.model.predict(f.reshape(1,1,self.n))[0] return g_
class s_lstm(object): #{ def __init__(self, layers, input_size=2, output_size=2, batch_size=1, seq_size=1): #{ self.model = Sequential() self.n_layers = len(layers) self.batch_size = batch_size self.seq_size = seq_size self.input_size = input_size self.output_size = output_size self.input_shape = (self.batch_size, seq_size, self.input_size) if (self.n_layers < 1): #{ raise ValueError('you done f****d up now! n_layers must be >= 1') else: self.model.add( LSTM(layers[0], batch_input_shape=self.input_shape, return_sequences=True, stateful=True)) for i in range(1, self.n_layers): #{ self.model.add( LSTM(layers[i], return_sequences=True, stateful=True)) #} #} self.model.add(TimeDistributed(Dense(self.output_size))) self.model.compile(loss='mse', optimizer='rmsprop') #} def reset_states(self): self.model.reset_states() def train_on_batch(self, x, y): self.model.train_on_batch(x, y) def predict_on_batch(self, x): return self.model.predict_on_batch(x) def test_on_batch(self, x, y): return self.model.test_on_batch(x, y) def static_test_on_batch(self, x, y): old_states = self.get_states() ret = self.test_on_batch(x, y) self.set_states(old_states) return ret def static_predict_on_batch(self, x): old_states = self.get_states() ret = self.predict_on_batch(x) self.set_states(old_states) return ret def get_states(self): #{ n_layers = len(self.model.layers) - 1 states = [None] * n_layers for i in range(n_layers): #{ temp = [None] * 2 for j in range(2): #{ temp[j] = K.get_value(self.model.layers[i].states[j]) #} states[i] = temp #} return states #} def set_states(self, states): #{ n_layers = len(self.model.layers) - 1 for i in range(n_layers): #{ for j in range(2): #{ K.set_value(self.model.layers[i].states[j], states[i][j])
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ for ret_seq in [True, False]: layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, input_shape=(timesteps, embedding_dim)) layer.input = K.variable( np.ones((nb_samples, timesteps, embedding_dim))) layer.get_config() for train in [True, False]: out = K.eval(layer.get_output(train)) # Make sure the output has the desired shape if ret_seq: assert (out.shape == (nb_samples, timesteps, output_dim)) else: assert (out.shape == (nb_samples, output_dim)) mask = layer.get_output_mask(train) # check statefulness model = Sequential() model.add( embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps))) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps))) assert (out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert (out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert (out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert (out4.max() != out5.max()) # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 left_padded_input[2, :3] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 right_padded_input[2, -3:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5)
def train(self, givenDataset, givenLabels, args = {}): dataset = {} labels = {} if 'epoch' not in args: args['epoch'] = 5 if 'LSTM' not in args: args['LSTM'] = [128, 256, 512] elif type(args['LSTM']) != list: args['LSTM'] = [args['LSTM']] if 'dense' not in args: args['dense'] = [] elif type(args['dense']) != list: args['dense'] = [args['dense']] if 'batch' not in args: args['batch'] = 16 if 'lr' not in args: args['lr'] = 0.0001 if 'stateful' not in args: args['stateful'] = False if 'cut_timesteps' not in args: args['cut_timesteps'] = False #remove any zero-size LSTM/dense layers for arr in [args['LSTM'], args['dense']]: while 0 in arr: arr.remove(0) features = givenDataset['train'].shape[2] time_steps = givenDataset['train'].shape[1] for kind in ['warm', 'train', 'test']: #reformat only the labels first labels[kind] = givenLabels[kind].astype(np.float32) #shape of labels is (samples, targets) self.num_targets = labels[kind].shape[1] #_, labels[kind] = self.reformat(givenDataset[kind], givenLabels[kind], features, time_steps, num_labels) #reformat the dataset for LSTM format of [samples, time steps, features] dataset[kind] = np.reshape(givenDataset[kind], (-1, givenDataset[kind].shape[1], givenDataset[kind].shape[2])) if args['cut_timesteps']: print("Warning! Cutting all previous timesteps") dataset[kind] = np.reshape(dataset[kind][:, -1, :], (-1, 1, givenDataset[kind].shape[2])) #cut except the latest time step time_steps = 1 print('%s dataset with initial shape %s and resulting shape %s with labels %s' % (kind, givenDataset[kind].shape, dataset[kind].shape, labels[kind].shape)) model = Sequential() for i, layer in enumerate(args['LSTM']): ret_seq=(i< (len(args['LSTM'])-1)) name = str(layer)+ '_' + str(i+1) + ('_ret_seq' if ret_seq else '') if i==0: model.add(LSTM(layer, return_sequences=ret_seq, stateful=args['stateful'], batch_input_shape=(args['batch'], time_steps, features), name=name ) ) model.add(Dropout(0.1, name='0.1')) else: model.add(LSTM(layer, return_sequences=ret_seq, stateful=args['stateful'], name=name) ) #model.add(Activation('relu')) print("Adding LSTM Layer of size %d." % layer) for dense in args['dense']: model.add(Dense(dense)) #model.add(Activation('relu')) model.add(Dense(self.num_targets, name=str(self.num_targets))) model.add(Activation('linear', name='linear')) #model.add(PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=None)) opt = Adam(args['lr']) model.compile(loss='mean_squared_error', optimizer=opt) self.plotModel(model) val = (dataset['test'], labels['test']) if not args['stateful']: model.fit(dataset['train'], labels['train'], epochs=args['epoch'], batch_size=args['batch'], validation_data=val, verbose=1, shuffle=False) else: for i in range(args['epoch']): model.predict(dataset['warm'], batch_size=args['batch']) #predict so that fitting starts with a state model.fit(dataset['train'], labels['train'], epochs=1, batch_size=args['batch'], validation_data=val, verbose=1, shuffle=False) model.reset_states() # make predictions self.prediction = {} predictionBatch = args['batch'] model.predict(dataset['warm'], batch_size=predictionBatch) self.prediction['train'] = model.predict(dataset['train'], batch_size=predictionBatch) self.prediction['test'] = model.predict(dataset['test'], batch_size=predictionBatch) model.reset_states() print(self.prediction['test'].shape) self.scorePrediction(self.prediction, labels, 'train', self.num_targets) self.scorePrediction(self.prediction, labels, 'test', self.num_targets)
def test_recurrent_convolutional(): nb_row = 3 nb_col = 3 nb_filter = 5 nb_samples = 2 input_channel = 2 input_nb_row = 5 input_nb_col = 5 sequence_len = 2 for dim_ordering in ['th', 'tf']: if dim_ordering == 'th': input = np.random.rand(nb_samples, sequence_len, input_channel, input_nb_row, input_nb_col) else: # tf input = np.random.rand(nb_samples, sequence_len, input_nb_row, input_nb_col, input_channel) for return_sequences in [True, False]: # test for ouptput shape: output = layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={ 'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'border_mode': "same" }, input_shape=input.shape) output_shape = [nb_samples, input_nb_row, input_nb_col] if dim_ordering == 'th': output_shape.insert(1, nb_filter) else: output_shape.insert(3, nb_filter) if return_sequences: output_shape.insert(1, sequence_len) assert output.shape == tuple(output_shape) # No need to check statefulness for both if dim_ordering == 'th' or return_sequences: continue # Tests for statefulness model = Sequential() kwargs = { 'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'stateful': True, 'batch_input_shape': input.shape, 'border_mode': "same" } layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(input)) assert (out1.shape == tuple(output_shape)) # train once so that the states change model.train_on_batch(np.ones_like(input), np.ones_like(output)) out2 = model.predict(np.ones_like(input)) # if the state is not reset, output should be different assert (out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(input)) assert (out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(input)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(input)) assert (out4.max() != out5.max()) # check regularizers kwargs = { 'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'stateful': True, 'batch_input_shape': input.shape, 'W_regularizer': regularizers.WeightRegularizer(l1=0.01), 'U_regularizer': regularizers.WeightRegularizer(l1=0.01), 'b_regularizer': 'l2', 'border_mode': "same" } layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(input.shape) output = layer(K.variable(np.ones(input.shape))) K.eval(output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={ 'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'border_mode': "same", 'dropout_W': 0.1, 'dropout_U': 0.1 }, input_shape=input.shape)
class CaptionModel(object): def __init__(self, image_len, caption_len, vocab_size, ifpool): # image feature length, such as fc2 or fc1 self.image_len = image_len # max caption length self.caption_len = caption_len + 2 # add start word and end word # unique vocabular size self.vocab_size = vocab_size self.ifpool = ifpool # save_path for checkpoint and tensorboard self.save_path = FLAGS.save_path # pooling feature shape self.pooling_shape = (7, 7, 512) self.conv_channel = 512 # embedding_size (default = 128) word embedding size self.embedding_size = FLAGS.embedding_size self.image_embedding_size = FLAGS.image_embedding_size self.RNN_out_uints = FLAGS.RNN_out_units self.batch_size = FLAGS.batch_size self.inference_batch_size = FLAGS.inference_batch_size self.epochs = FLAGS.epochs self.num_RNN_layers = FLAGS.num_RNN_layers self.RNN = { 'GRU': GRU, 'LSTM': LSTM, 'SimpleRNN': SimpleRNN }[FLAGS.RNN_category] def build_train_model(self): if self.ifpool: image_input = Input(shape=self.pooling_shape, name='image_input') else: image_input = Input(shape=(self.image_len, ), name='image_input') caption_input = Input(shape=(self.caption_len, ), name='caption_input') image_model = Sequential(name='image_model') if self.ifpool: image_model.add( Conv2D(self.conv_channel, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=self.pooling_shape)) image_model.add( Conv2D(self.conv_channel, kernel_size=3, strides=1, padding='same', activation='relu')) image_model.add( Conv2D(self.conv_channel, kernel_size=3, strides=1, padding='same', activation='relu')) image_model.add(GlobalAveragePooling2D()) image_model.add( Dense(self.image_embedding_size * self.caption_len, activation='relu')) else: image_model.add( Dense(self.image_embedding_size * self.caption_len, activation='relu', input_shape=(self.image_len, ))) image_model.add(Reshape((self.caption_len, self.image_embedding_size))) language_model = Sequential(name='language_model') language_model.add( Embedding(self.vocab_size, self.embedding_size, input_length=self.caption_len)) image_embedding = image_model(image_input) caption_embedding = language_model(caption_input) RNN_input = Concatenate(axis=-1)([image_embedding, caption_embedding]) for layer_idx in range(self.num_RNN_layers): if layer_idx == 0: locals()['RNN_output%s' % layer_idx] = self.RNN( self.RNN_out_uints, name='RNN%s' % layer_idx, return_sequences=True)(RNN_input) else: locals()['RNN_output%s' % layer_idx] = self.RNN( self.RNN_out_uints, name='RNN%s' % layer_idx, return_sequences=True)(locals()['RNN_output%s' % (layer_idx - 1)]) caption_output = Dense(self.vocab_size, activation='softmax', name='output')( locals()['RNN_output%s' % (self.num_RNN_layers - 1)]) self.model = Model([image_input, caption_input], caption_output) self.model.summary() self.model.compile(optimizer='rmsprop', metrics=['accuracy'], loss='categorical_crossentropy') def generator(self, imageData, captionData): """Generator for train or val Shuffle the training samples For each image, we only choose a caption randomly from its 5 captions Arg: imageData: image feature captionData: several one-hot vector for each image """ sampleNum = imageData.shape[0] # train set and validation set has different steps_per_epoch, so we need compute again steps_per_epoch = sampleNum // self.batch_size while 1: idx = np.arange(sampleNum) np.random.shuffle(idx) X1 = np.zeros((self.batch_size, self.image_len)) X2 = np.zeros((self.batch_size, self.caption_len)) Y = np.zeros((self.batch_size, self.caption_len, self.vocab_size)) Y_end = np.zeros((self.batch_size, 1, self.vocab_size)) Y_end[:, 0, 0] = 1 # The end flag for caption for i in range(steps_per_epoch): randidx = idx[i * self.batch_size:(i + 1) * self.batch_size] X1 = imageData[randidx] Y = np.array( map(random.choice, [captionData[j] for j in randidx])) X2 = np.argmax(Y, axis=-1) yield ({ 'image_input': X1, 'caption_input': X2 }, { 'output': np.concatenate([Y[:, 1:], Y_end], axis=1) }) def build_train_model_from_checkpoint(self, checkpoint): self.model = load_model(checkpoint) def train(self, X_train, Y_train, X_val, Y_val): train_num = len(Y_train) val_num = len(Y_val) steps_per_epoch = train_num // self.batch_size val_steps = val_num // self.batch_size reduce_lr = ReduceLROnPlateau(monitor='val_loss', verbose=1, factor=0.1, patience=5, min_lr=0.00001) save_model = ModelCheckpoint( self.save_path + '/checkpoint/weights.{epoch:03d}-{val_acc:.3f}.hdf5', monitor='loss', verbose=1, save_best_only=True) tensorboard = TensorBoard(log_dir=self.save_path + '/tf_logs') self.model.fit_generator( self.generator(X_train, Y_train), steps_per_epoch=steps_per_epoch, validation_data=self.generator(X_val, Y_val), validation_steps=val_steps, callbacks=[reduce_lr, save_model, tensorboard], epochs=self.epochs) def build_inference_model(self, checkpoint, beam_search=False): if beam_search: self.inference_batch_size = 1 model = load_model(checkpoint) # image model self.image_model = Sequential() if self.ifpool: self.image_model.add( Conv2D(self.conv_channel, trainable=False, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=self.pooling_shape)) self.image_model.add( Conv2D(self.conv_channel, trainable=False, kernel_size=3, strides=1, padding='same', activation='relu')) self.image_model.add( Conv2D(self.conv_channel, trainable=False, kernel_size=3, strides=1, padding='same', activation='relu')) self.image_model.add(GlobalAveragePooling2D()) self.image_model.add( Dense(self.image_embedding_size * self.caption_len, trainable=False, activation='relu')) else: self.image_model.add( Dense(self.image_embedding_size * self.caption_len, trainable=False, activation='relu', input_shape=(self.image_len, ))) self.image_model.add( Reshape((self.caption_len, self.image_embedding_size))) # copy weights from loaded model image_layer = model.get_layer('sequential_1') assert image_layer is not None, 'There is no layer named sequential_1' self.image_model.set_weights(image_layer.get_weights()) # language model self.language_model = Sequential() self.language_model.add( Embedding(self.vocab_size, self.embedding_size, trainable=False, input_length=1) ) # set input_length=1 so as to forward one-step # copy weights from loaded model language_layer = model.get_layer('sequential_2') self.language_model.set_weights(language_layer.get_weights()) # caption model # Forward a image/word embedding to get a next word # Use Stateful RNN self.caption_model = Sequential() for idx in range(self.num_RNN_layers): if idx == 0: self.caption_model.add( self.RNN(self.RNN_out_uints, return_sequences=True, trainable=False, stateful=True, batch_input_shape=(self.inference_batch_size, 1, self.embedding_size + self.image_embedding_size))) else: self.caption_model.add( self.RNN(self.RNN_out_uints, return_sequences=True, trainable=False, stateful=True)) self.caption_model.add( Dense(self.vocab_size, activation='softmax', trainable=False)) caption_weigts = [] for layer in model.layers[-( self.num_RNN_layers + 1):]: # copy the last (num_rnn_layers+1) layer's weights caption_weigts.extend(layer.get_weights()) self.caption_model.set_weights(caption_weigts) del model def get_image_output(self, test_batch): return self.image_model.predict_on_batch(test_batch) def inference(self, X_test): """Inference using greedy method """ test_num = X_test.shape[0] assert test_num % self.inference_batch_size == 0, 'inference_batch_size should divide Test sample number' steps_per_epoch = test_num // self.inference_batch_size result = np.zeros((0, self.caption_len)) for i in range(steps_per_epoch): test_batch = X_test[i * self.inference_batch_size:(i + 1) * self.inference_batch_size] char = np.zeros((self.inference_batch_size, self.caption_len)) image_output = self.get_image_output(test_batch) self.caption_model.reset_states() predict = self.caption_model.predict_on_batch(image_output) for j in range(self.caption_len): char[:, j] = np.argmax(predict, axis=-1).squeeze() language_output = self.language_model.predict_on_batch(char[:, j]) predict = self.caption_model.predict_on_batch(language_output) result = np.concatenate([result, char], axis=0) return result def inference_step(self, image_embedding, sentence_feed): """Used for Beam search Get the next predict word and prob Args: image_embedding: the image_model's output part_caption: the index to part caption word """ self.caption_model.reset_states() for step, word_idx in enumerate(sentence_feed): word = np.array([word_idx]) language_output = self.language_model.predict_on_batch(word[None, ...]) caption_input = np.concatenate( [image_embedding[0, step][None, None, ...], language_output], axis=-1) next_predict = self.caption_model.predict_on_batch(caption_input) return next_predict
def eval(self, grn): seed = np.random.randint(1e5) model = Sequential() batch_input_shape=(self.batch_size, self.x_train.shape[1], self.x_train.shape[2]) start_time = datetime.now() if self.model_type == LSTM or self.model_type == SimpleRNN: np.random.seed(int(np.round(grn.identifiers[0]*100))) layer = self.model_type(self.nout, stateful=self.stateful, batch_input_shape=batch_input_shape) model.add(layer) else: np.random.seed(self.seed) layer = self.model_type(str(grn), stateful=self.stateful, batch_input_shape=batch_input_shape) model.add(layer) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') error = [] if self.learn: for i in range(self.epochs): history = model.fit(self.x_train, self.y_train, batch_size=self.batch_size, epochs=1, verbose=0, shuffle=False) model.reset_states() with open(self.logfile, 'a') as f: for l in range(len(history.history['loss'])): train_fit = history.history['loss'][l] error += [train_fit] f.write('L,%e,%d,%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%e\n' % ( (datetime.now()-start_time).total_seconds(), self.seed, self.model_type.__name__, self.epochs, self.lamarckian, self.unsupervised, self.stateful, self.ntrain, self.ntest, self.shift, self.lag, self.generation, i, train_fit)) # lamarckian evolution if self.lamarckian: layer.set_learned_genes(grn) start_error = error[0] end_error = error[-1] fit = start_error - end_error total_error = np.sum(np.abs(error)) if not self.unsupervised: # predict and return unscaled difference ntest = len(self.y_test) yhat = model.predict(self.x_test, batch_size=self.batch_size, verbose=0) final = np.concatenate(( np.reshape(self.x_test, (self.x_test.shape[0], self.x_test.shape[2])), np.reshape(yhat, (ntest,1))), axis=1) inverted = self.scaler.inverse_transform(final) yhatp = inverted[:, -1] target = self.df['target'].tail(ntest) fit = np.sqrt(mean_squared_error(yhatp, target)) total_error = np.sum(np.abs(yhatp - target)) if np.isnan(fit): fit = -1e10 with open(self.logfile, 'a') as f: f.write('M,%e,%d,%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%e,%e,%e,%e\n' % ( (datetime.now()-start_time).total_seconds(), self.seed, self.model_type.__name__, self.epochs, self.lamarckian, self.unsupervised, self.stateful, self.ntrain, self.ntest, self.shift, self.lag, self.generation, start_error, end_error, total_error, fit)) del model K.clear_session() np.random.seed(seed) return fit
def modeling(univariate, look_back, room_name): split = 0.9 scaler = MinMaxScaler(feature_range=(0, 1)) univariate = scaler.fit_transform(univariate) univariate = univariate.tolist() ts_univariate = series_to_supervised(univariate, look_back, look_back) ts_univariate = np.array(ts_univariate) train_X, train_y, test_X, test_y = split_test_train( ts_univariate, split, look_back) # reshape input to be [samples, time steps, features] train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1])) test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1])) # Model model = Sequential() # LSTM model.add( LSTM( look_back + 1, batch_input_shape=(look_back, train_X.shape[1], train_X.shape[2]), stateful=True, return_sequences=True, )) model.add(LSTM(look_back, return_sequences=True, stateful=True)) model.add(LSTM(look_back, stateful=True)) # model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Dense(look_back)) # train_y.shape[1] model.add(Activation('relu')) model.compile(loss='mean_squared_error', optimizer='rmsprop') # fit network for i in range(100): print "Epoch:", i + 1 model.fit(np.concatenate((train_X, test_X), axis=0), np.concatenate((train_y, test_y), axis=0), epochs=1, batch_size=look_back, verbose=1, shuffle=False) model.reset_states() model_json = model.to_json() with open("data/models/" + room_name + ".json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("data/models/" + room_name + ".h5") print("Saved model to disk") # make predictions trainPredict = model.predict(train_X, batch_size=look_back) testPredict = model.predict(test_X, batch_size=look_back) # invert predictions trainPredict = scaler.inverse_transform(trainPredict) train_y = scaler.inverse_transform(train_y) testPredict = scaler.inverse_transform(testPredict) test_y = scaler.inverse_transform(test_y) # calculate root mean squared error train_score, test_score = [], [] for i in range(0, len(train_y)): train_score.append( math.sqrt(mean_squared_error(train_y[i, :], trainPredict[i, :]))) for i in range(0, len(test_y)): test_score.append( math.sqrt(mean_squared_error(test_y[i, :], testPredict[i, :]))) print room_name print('Train Score: %.4f RMSE' % (np.mean(train_score))) print('Test Score: %.4f RMSE' % (np.mean(test_score)))
def model_train_for_column(norm_set): # load the dataset dataframe = pandas.DataFrame(norm_set) dataset = dataframe.values dataset = dataset.astype('float32') # normalize the dataset scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) # split into train and test sets train_size = int(len(dataset) * 0.5) test_size = len(dataset) - train_size train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:] # reshape into X=t and Y=t+1 look_back = 3 trainX, trainY = create_dataset(train, look_back) testX, testY = create_dataset(test, look_back) # reshape input to be [samples, time steps, features] trainX = numpy.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) testX = numpy.reshape(testX, (testX.shape[0], testX.shape[1], 1)) # create and fit the LSTM network batch_size = 1 model = Sequential() model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=True)) model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(500): #500 model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=2, shuffle=False) model.reset_states() # make predictions trainPredict = model.predict(trainX, batch_size=batch_size) model.reset_states() testPredict = model.predict(testX, batch_size=batch_size) # invert predictions trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY]) # calculate root mean squared error trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0])) print('Train Score: %.2f RMSE' % (trainScore)) testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0])) print('Test Score: %.2f RMSE' % (testScore)) # return scaler.inverse_transform(dataset), trainPredict, testPredict # shift train predictions for plotting trainPredictPlot = numpy.empty_like(dataset) trainPredictPlot[:, :] = numpy.nan trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict # shift test predictions for plotting testPredictPlot = numpy.empty_like(dataset) testPredictPlot[:, :] = numpy.nan testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict return scaler.inverse_transform(dataset), trainPredictPlot, testPredictPlot
model.add( LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=True)) model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') for i in range(100): model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=2, shuffle=False) model.reset_states() # Estimate model performance trainScore = model.evaluate(trainX, trainY, batch_size=batch_size, verbose=0) model.reset_states() print('Train Score: ', scaler.inverse_transform(numpy.array([[trainScore]]))) testScore = model.evaluate(testX, testY, batch_size=batch_size, verbose=0) model.reset_states() print('Test Score: ', scaler.inverse_transform(numpy.array([[testScore]]))) # generate predictions for training trainPredict = model.predict(trainX, batch_size=batch_size) model.reset_states() testPredict = model.predict(testX, batch_size=batch_size) model.reset_states() # shift train predictions for plotting trainPredictPlot = numpy.empty_like(dataset) trainPredictPlot[:, :] = numpy.nan
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ # check return_sequences layer_test(layer_class, kwargs={'output_dim': output_dim, 'return_sequences': True}, input_shape=(3, 2, 3)) # check dropout layer_test(layer_class, kwargs={'output_dim': output_dim, 'dropout_U': 0.1, 'dropout_W': 0.1}, input_shape=(3, 2, 3)) # check implementation modes for mode in ['cpu', 'mem', 'gpu']: layer_test(layer_class, kwargs={'output_dim': output_dim, 'consume_less': mode}, input_shape=(3, 2, 3)) # check statefulness model = Sequential() model.add(embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps))) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps))) assert(out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert(out4.max() != out5.max()) # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 left_padded_input[2, :3] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 right_padded_input[2, -3:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5) # check regularizers layer = layer_class(output_dim, return_sequences=False, weights=None, batch_input_shape=(nb_samples, timesteps, embedding_dim), W_regularizer=regularizers.WeightRegularizer(l1=0.01), U_regularizer=regularizers.WeightRegularizer(l1=0.01), b_regularizer='l2') shape = (nb_samples, timesteps, embedding_dim) layer.set_input(K.variable(np.ones(shape)), shape=shape) K.eval(layer.output)
multi_model.add( LSTM(4, batch_input_shape=(btc_size, train_X.shape[1], train_X.shape[2]), stateful=True)) multi_model.add(Dense(1)) multi_model.compile(loss='mse', optimizer='adam') for i in range(int(train_X.shape[0] / btc_size)): this_X = train_X[(i * btc_size):((i + 1) * btc_size)][:][:] this_y = train_y[(i * btc_size):((i + 1) * btc_size)] multi_history = multi_model.fit(this_X, this_y, epochs=10, batch_size=btc_size, verbose=0, shuffle=False) multi_model.reset_states() # Make predictions: y_pred = [test_X.shape[0]] for i in range(int(test_X.shape[0] / btc_size)): this_X = test_X[(i * btc_size):((i + 1) * btc_size)][:][:] this_pred = multi_model.predict(this_X, batch_size=btc_size) y_pred[(i * btc_size):((i + 1) * btc_size)] = this_pred test_X_nn = test_X.reshape((test_X.shape[0], test_X.shape[2])) # Invert scaling for forecast inv_y_pred = np.concatenate((y_pred, test_X_nn[:, 1:]), axis=1) inv_y_pred = scaler.inverse_transform(inv_y_pred) inv_y_pred = inv_y_pred[:, 0] # Invert scaling for actual test_y_nn = test_y.reshape((len(test_y), 1))
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ for ret_seq in [True, False]: layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, input_shape=(timesteps, embedding_dim)) layer.input = K.variable(np.ones((nb_samples, timesteps, embedding_dim))) layer.get_config() for train in [True, False]: out = K.eval(layer.get_output(train)) # Make sure the output has the desired shape if ret_seq: assert out.shape == (nb_samples, timesteps, output_dim) else: assert out.shape == (nb_samples, output_dim) mask = layer.get_output_mask(train) # check statefulness model = Sequential() model.add( embeddings.Embedding( embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps), ) ) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer="sgd", loss="mse") out1 = model.predict(np.ones((nb_samples, timesteps))) assert out1.shape == (nb_samples, output_dim) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert out1.max() != out2.max() # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert out2.max() != out3.max() # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert out4.max() != out5.max() # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 left_padded_input[2, :3] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 right_padded_input[2, -3:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5)
def hyper_build_model(self, space, predict, custom_batch_size=None): conf = self.conf model_conf = conf['model'] rnn_size = model_conf['rnn_size'] rnn_type = model_conf['rnn_type'] regularization = model_conf['regularization'] dropout_prob = model_conf['dropout_prob'] length = model_conf['length'] pred_length = model_conf['pred_length'] # skip = model_conf['skip'] stateful = model_conf['stateful'] return_sequences = model_conf['return_sequences'] # model_conf['output_activation'] output_activation = conf['data']['target'].activation num_signals = conf['data']['num_signals'] batch_size = self.conf['training']['batch_size'] if predict: batch_size = self.conf['model']['pred_batch_size'] # so we can predict with one time point at a time! if return_sequences: length = pred_length else: length = 1 if custom_batch_size is not None: batch_size = custom_batch_size if rnn_type == 'LSTM': rnn_model = LSTM elif rnn_type == 'SimpleRNN': rnn_model = SimpleRNN else: print('Unkown Model Type, exiting.') exit(1) batch_input_shape = (batch_size, length, num_signals) model = Sequential() for _ in range(model_conf['rnn_layers']): model.add( rnn_model( rnn_size, return_sequences=return_sequences, batch_input_shape=batch_input_shape, stateful=stateful, kernel_regularizer=l2(regularization), recurrent_regularizer=l2(regularization), bias_regularizer=l2(regularization), dropout=dropout_prob, recurrent_dropout=dropout_prob)) model.add(Dropout(space['Dropout'])) if 'real_time_prediction' in model_conf.keys() and model_conf['real_time_prediction']==True: model.add(Dense(1, activation=output_activation)) elif return_sequences ==False: model.add(Dense(1, activation=output_activation)) else: model.add(TimeDistributed(Dense(1, activation=output_activation))) model.reset_states() return model
def train(self, givenDataset, givenLabels, args={}, loadModel=None): dataset = {} labels = {} if 'epoch' not in args: args['epoch'] = 5 if 'hidden' not in args: args['hidden'] = [2048, 1024, 512, 50] elif type(args['hidden']) != list: args['hidden'] = [args['hidden']] if 'batch' not in args: args['batch'] = 16 if 'lr' not in args: args['lr'] = 0.0001 #remove any zero-size LSTM/dense layers for arr in [args['hidden']]: while 0 in arr: arr.remove(0) features = givenDataset['train'].shape[2] time_steps = givenDataset['train'].shape[1] for kind in ['warm', 'train', 'test']: #reformat only the labels first labels[kind] = givenLabels[kind].astype( np.float32) #shape of labels is (samples, targets) self.num_targets = labels[kind].shape[1] dataset[kind] = np.reshape(givenDataset[kind], (-1, givenDataset[kind].shape[1] * givenDataset[kind].shape[2])) print( '%s dataset with initial shape %s and resulting shape %s with labels %s' % (kind, givenDataset[kind].shape, dataset[kind].shape, labels[kind].shape)) history = {} if loadModel is not None: model = self.loadModelKeras(loadModel) else: model = Sequential() for i, layer in enumerate(args['hidden']): if i == 0: model.add( Dense(layer, batch_input_shape=(args['batch'], time_steps * features), name=str(layer))) model.add(Dropout(0.1, name='0.1')) else: model.add(Dense(layer, name=str(layer))) model.add(Activation('relu', name='relu_' + str(i + 1))) print("Adding Dense Layer of size %d." % layer) model.add(Dense(self.num_targets, name='1')) model.add(Activation('linear', name='linear')) #model.add(PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=None)) opt = Adam(args['lr']) model.compile(loss='mean_squared_error', optimizer=opt) self.plotModel(model) for i in range(args['epoch']): epochHist = model.fit(dataset['train'], labels['train'], validation_data=(dataset['test'], labels['test']), epochs=1, batch_size=args['batch'], verbose=1, shuffle=False) prediction = {} prediction['test'] = model.predict(dataset['test'], batch_size=args['batch']) evalHist = self.scorePrediction(prediction, labels, 'test', self.num_targets)[0] for key in evalHist: #temporary workaround evalHist[key] = evalHist[key]['test'] for scoresDict in [epochHist.history, evalHist]: for key in scoresDict: if key not in history: history[key] = [] if type(scoresDict[key]) == list: history[key].extend(scoresDict[key]) else: history[key].append(scoresDict[key]) # make predictions self.prediction = {} predictionBatch = args['batch'] model.predict(dataset['warm'], batch_size=predictionBatch) self.prediction['train'] = model.predict(dataset['train'], batch_size=predictionBatch) self.prediction['test'] = model.predict(dataset['test'], batch_size=predictionBatch) model.reset_states() print(self.prediction['test'].shape) self.scorePrediction(self.prediction, labels, 'train', self.num_targets) self.scorePrediction(self.prediction, labels, 'test', self.num_targets) self.model = model return history
def generate_lstm_gmm(seq, maxlen=1, bs=500, ep=2, output_iterations=10, num_mixture_components=3): # seq is a single sample, in the format (timesteps, features) ! # TODO: expand code to support multiple samples, fed into model together as a batch # Cut the timeseries data (variable name 'seq') into semi-redundant sequence chunks of maxlen X = [] y = [] for i in range(0, len(seq) - maxlen): X.append(seq[i:i+maxlen]) y.append(seq[i+maxlen]) dim = len((X[0][0])) print("sequence chunks:", len(X)) print("chunk width:", len(X[0])) print("vector dimension:", dim) print("number of mixture components:", num_mixture_components) print("batch size:", bs) X = np.array(X) y = np.array(y) # build the model: 2 stacked LSTM print('Build model...') model = Sequential() model.reset_states() model.add(LSTM((dim+2) * num_mixture_components, return_sequences=False, input_shape=(maxlen, dim))) model.add(Dense((dim+2) * num_mixture_components)) model.add(GMMActivation(num_mixture_components)) model.compile(loss=gmm_loss, optimizer=RMSprop(lr=0.001)) # Train the model model.fit(X, y, batch_size=bs, nb_epoch=ep) # Generate timeseries x_seed = X[len(X)-1] #choose final in-sample data point to initialize model x_array = [] x_array.append(x_seed) x = np.array(x_array) predicted = [] for i in range(output_iterations): pred_parameters = model.predict_on_batch(x)[0] means = pred_parameters[:num_mixture_components * dim] sds = pred_parameters[(num_mixture_components * dim):(num_mixture_components * (dim+1))] weights = pred_parameters[(num_mixture_components * (dim + 1)):] print(means) print(sds) print(weights) means = means.reshape(num_mixture_components, dim) sds = sds[:, np.newaxis] weights = weights[:, np.newaxis] pred = weights * np.random.normal(means, sds) pred = np.sum(pred, axis=0) predicted.append(pred) return predicted
inference_model.add(Reshape((1, hiddenStateSize3))) inference_model.add(GRU(hiddenStateSize3, batch_input_shape=(1,1,hiddenStateSize3), stateful = True)) inference_model.add(Dense(hiddenLayerSize)) inference_model.add(Activation('relu')) inference_model.add(Dense(len(char2id))) inference_model.add(Activation('softmax')) print(inference_model.summary()) inference_model.load_weights('cocktail_weights.h5') recipes = [] for i in range(0, 100): inference_model.reset_states() startChar = np.zeros((1, 1, len(char2id))) startChar[0, 0, char2id['S']] = 1 end = False sent = "" for i in range(0, max_sequence_length): nextCharProbs = inference_model.predict(startChar) nextCharProbs = np.asarray(nextCharProbs).astype('float64') nextCharProbs = nextCharProbs / nextCharProbs.sum() nextCharId = np.random.multinomial(1, nextCharProbs.squeeze(), 1).argmax() if id2char[nextCharId] == 'E': if not end: print("~~~~~")
def build_model(X_train, X_test, Y_train, noLSTM, train_labels): model = Sequential() model.reset_states() with codecs.open(rootFolder + "training.csv", 'a') as logfile: fieldnames = ['lstm1', 'lstm2', 'dense1', 'dense2', 'dense3'] writer = csv.DictWriter(logfile, fieldnames=fieldnames) writer.writerow({ 'lstm1': noLSTM[0], 'lstm2': noLSTM[1], 'dense1': noLSTM[2], 'dense2': noLSTM[3], 'dense3': noLSTM[4] }) # input if (noLSTM[2] != 0): model.add( Dense(X_train.shape[2], input_shape=(slidingWindowSize, X_train.shape[2]), activation='tanh', use_bias=True)) if (noLSTM[3] != 0): model.add(Dense(noLSTM[3], activation='tanh', use_bias=True)) if (noLSTM[4] != 0): model.add(Dense(noLSTM[4], activation='tanh', use_bias=True)) model.add(LSTM(noLSTM[0], activation='tanh', recurrent_activation='hard_sigmoid', \ use_bias=True, kernel_initializer='glorot_uniform', \ recurrent_initializer='orthogonal', \ unit_forget_bias=True, kernel_regularizer=None, \ recurrent_regularizer=None, \ bias_regularizer=None, activity_regularizer=None, \ kernel_constraint=None, recurrent_constraint=None, \ bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, \ implementation=1, return_sequences=True, return_state=False, \ go_backwards=True, stateful=False, unroll=False)) model.add(Dropout(0.5)) if (noLSTM[1] != 0): model.add(LSTM(noLSTM[1], activation='tanh', recurrent_activation='hard_sigmoid', \ use_bias=True, kernel_initializer='glorot_uniform', \ recurrent_initializer='orthogonal', \ unit_forget_bias=True, kernel_regularizer=None, \ recurrent_regularizer=None, \ bias_regularizer=None, activity_regularizer=None, \ kernel_constraint=None, recurrent_constraint=None, \ bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, \ implementation=1, return_sequences=True, return_state=False, \ go_backwards=True, stateful=False, unroll=False)) model.add(Dropout(0.5)) # dense if (noLSTM[3] != 0): model.add(Dense(noLSTM[3], activation='tanh', use_bias=True)) if (noLSTM[4] != 0): model.add(Dense(noLSTM[4], activation='tanh', use_bias=True)) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(3)) model.add(Activation('softmax')) # ['acc', 'loss', 'val_acc', 'val_loss'] opt = Adam(lr=0.0011, decay=0.001) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) fnametmp = rootFolder + "plot_{}_{}_{}_{}_{}.png".format( "Model", noLSTM[0], noLSTM[1], noLSTM[2], noLSTM[3], noLSTM[4]) plot_model(model, to_file=fnametmp, show_shapes=True, show_layer_names=True, rankdir='TB') return early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=1, mode='auto') tn = TerminateOnNaN() reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, min_lr=1e-7, verbose=1) checkpoint_path = os.path.join( rootFolder, "weights.best_{}_{}_{}_{}_{}.hdf5".format("model", noLSTM[0], noLSTM[1], noLSTM[2], noLSTM[3], noLSTM[4])) checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') csv_logger = CSVLogger(rootFolder + 'training.csv', append=True) early_stop = EarlyStopping(monitor='val_acc', patience=1, verbose=2, mode='auto') callback_fns = [early_stopping, tn, csv_logger, checkpoint, reduce_lr] history = model.fit(X_train, Y_train, batch_size=20, epochs=20, callbacks=callback_fns, validation_split=0.2, shuffle=True) fnametmp = rootFolder + "model_{}_{}_{}_{}_{}".format( noLSTM[0], noLSTM[1], noLSTM[2], noLSTM[3], noLSTM[4]) model.save_weights(fnametmp + '.h5') with open(fnametmp + '.json', 'w') as f: f.write(model.to_json()) fnametmp = "plot-{}-{}-{}.png".format("model-accuracy", noLSTM[0], noLSTM[1]) drawMe(yVal=history.history['acc'], xVal=history.history['val_acc'], title='model accuracy', xlabel='epoch', ylabel='accuracy', legend=['train', 'test'], save=True, fileName=fnametmp, show=False) fnametmp = "plot-{}-{}-{}.png".format("model-loss", noLSTM[0], noLSTM[1]) drawMe(yVal=history.history['loss'], xVal=history.history['val_loss'], title='model loss', xlabel='epoch', ylabel='loss', legend=['train', 'test'], save=True, fileName=fnametmp, show=False)
def get_model(): model = Sequential() if 'model' in locals(): model.reset_states() def identity_block(inputs, kernel_size, filters): filters1, filters2, filters3 = filters x = Conv1D(filters1, 1, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(inputs) x = BatchNormalization(momentum=batch_decay, epsilon=eps)(x) x = Activation('relu')(x) x = Conv1D(filters2, kernel_size, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), padding='same')(x) x = BatchNormalization(momentum=batch_decay, epsilon=eps)(x) x = Activation('relu')(x) x = Conv1D(filters3, 1, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(momentum=batch_decay, epsilon=eps)(x) x = add([x, inputs]) x = Activation('relu')(x) return x def conv_block(inputs, kernel_size, filters, strides=2): filters1, filters2, filters3 = filters x = Conv1D(filters1, 1, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(inputs) x = BatchNormalization(momentum=batch_decay, epsilon=eps)(x) x = Activation('relu')(x) x = Conv1D(filters2, kernel_size, strides=strides, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), padding='same')(x) x = BatchNormalization(momentum=batch_decay, epsilon=eps)(x) x = Activation('relu')(x) x = Conv1D(filters3, 1, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(momentum=batch_decay, epsilon=eps)(x) shortcut = Conv1D(filters3, 1, strides=strides, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(inputs) shortcut = BatchNormalization(momentum=batch_decay, epsilon=eps)(shortcut) x = add([x, shortcut]) x = Activation('relu')(x) return x inputs = Input(shape=(max_length, 2)) x = LSTM(256, return_sequences=True)(inputs) #x = LSTM(128, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True)(x) x = Activation('relu')(x) # x = Conv1D(filter1, conv_kernel, strides = 2, padding = 'valid', use_bias=False, kernel_initializer='he_normal', # kernel_regularizer=l2(weight_decay))(inputs) # x = BatchNormalization(momentum=batch_decay, epsilon=eps)(x) # x = Activation('relu')(x) # x = MaxPooling1D(3, strides = 2)(x) # # x = conv_block(x, block_kernel, [filter1,filter1,filter1*4]) # x = identity_block(x, block_kernel, [filter1, filter1, filter1*4]) # x = conv_block(x, block_kernel, [filter2,filter2,filter2*4]) # x = identity_block(x,3, [filter2, filter2, filter2*4]) # x = conv_block(x, block_kernel, [filter2, filter2, filter2 * 4]) # # x = SpatialDropout1D(rate = dropout_rate)(x) # x = Conv1D(filter2, 11, strides = 2, padding = 'valid', use_bias=False, kernel_initializer='he_normal', # kernel_regularizer=l2(weight_decay))(x) # x = Activation('relu')(x) x = GlobalAveragePooling1D()(x) ''' # dense to 100 >> final layer >> dense to num_classes x = Dense(100, kernel_regularizer = l2(weight_decay), bias_regularizer = l2(weight_decay), name ='features_hundred')(x) final_layer = Model(inputs= inputs, outputs = x) x = Dense(num_classes, kernel_regularizer=l2(weight_decay), bias_regularizer=l2(weight_decay), name= 'features')(x) x = Activation('softmax')(x) ''' x = Dense(num_classes, kernel_regularizer=l2(weight_decay), bias_regularizer=l2(weight_decay), name='features')(x) final_layer = Model(inputs=inputs, outputs=x) x = Activation('softmax')(x) model = Model(inputs=inputs, outputs=x) # optimizer sgd = optimizers.SGD(lr=learning_rate, momentum=momentum) adagrad = optimizers.Adagrad() adam = optimizers.Adam(lr=learning_rate) # compiler model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, final_layer
def logistic_nondos(csvname): data = pd.read_csv(csvname) headers = [ 'SourceIp', 'DestIp', 'SourcePort', 'destPort', 'Seq_num', 'Trans_Id', 'funcCode', 'Refno', 'Register_data', 'Exeption_Code', 'Time_Stamp', 'Relative_Time', 'HH', 'LL', 'H', 'L', 'pump_speed', 'tank1_level', 'tank2_level', 'Alarm' ] '''tm = time.ctime()+'\\' path = 'FNN_results\\' os.mkdir(path+tm) os.mkdir(path+tm+'model\\') train = path+tm+"train.csv" test = path+tm+"test.csv" label_train_pre=path+tm+"label_train_pre.csv" label_train_act = path+tm+"label_train_act.csv" label_test_pre = path + tm+"label_test_pre.csv" label_test_act = path+tm+"label_test_act.csv" indices_test = path+tm+"indices_test.csv" indices_train = path +tm+ "indices_train.csv" model_path = path+tm+'model/''' data.columns = headers data = data.dropna() X_Label = [i for i in data.columns.tolist() if i not in 'Alarm'] X_train, y_train, X_test, y_test, train_indx, test_indx = pre_split( data, X_Label, 0.3) es = EarlyStopping(monitor='loss', patience=10) #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=193) '''f = open(indices_test,'a') np.savetxt(f,test_indx) f.close()''' num_seq = X_train.shape[0] num_sample = [num_seq] #num_sample = [22,50,100,1000,10000,50000,100000,200000] for sample_size in num_sample: '''print(sample_size,file=open(train,"a")) print("Precision",',',"Recall",',',"F1",',',"Accuray",',',"loss",file=open(train,"a")) print(sample_size,file=open(test,"a")) print("Precision",',',"Recall",',',"F1",',',"Accuray",',',"loss",file=open(test,"a")) #print(sample_size,file=open(label_train,"a")) #print(sample_size,file=open(label_test,"a"))''' for kfold in range(1): print(sample_size) model = Sequential() indx = np.array(random.sample(range(X_train.shape[0]), sample_size)) print(X_train[indx].shape) model.reset_states() model.add(Dense(100, input_dim=X_train.shape[1], activation='relu')) #model.add(Dense(50,activation ='relu')) #model.add(Dense(20, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X_train[indx], y_train[indx], validation_split=0.1, batch_size=1000, shuffle=True, epochs=10000, callbacks=[es]) #model.save(model_path+str(sample_size)+'_'+str(kfold)+".h5") y_pre_train = model.predict_classes(X_train[indx]) y_pre_test = model.predict_classes(X_test) #print(X_test) #y_true_train = np.argmax(y_train[indx],axis=1) #y_true_test = np.argmax(y_test,axis=1) #print(y_true_test) ptrain, rtrain, ftrain = evaluate(y_pre_train, y_train) ptest, rtest, ftest = evaluate(y_pre_test, y_test) atrain = model.evaluate(X_train[indx], y_train[indx], steps=1) atest = model.evaluate(X_test, y_test, steps=1) acc_train = atrain[1] acc_test = atest[1] loss_train = atrain[0] loss_test = atest[0] #print(loss_train,file=open(train,"a")) #print(loss_test,file=open(test,"a")) labels = np.zeros((3, y_pre_test.shape[0])) labels[0, :] = test_indx + 2 labels[1, :] = y_test labels[2, :] = np.transpose(y_pre_test) np.savetxt('labels.csv', np.transpose(labels), delimiter=',') '''print(ptrain,',',rtrain,',',ftrain,',',acc_train,',',loss_train,file=open(train,"a"))
def runLstm(fName): # convert an array of values into a dataset matrix def create_dataset(dataset, look_back=1): dataX, dataY = [], [] for i in range(len(dataset) - look_back - 1): a = dataset[i:(i + look_back), 0] dataX.append(a) dataY.append(dataset[i + look_back, 0]) return np.array(dataX), np.array(dataY) # fix random seed for reproducibility np.random.seed(42) # load the dataset dataframe = pd.read_csv(fName, usecols=[1], engine='python') dataset = dataframe.values dataset = dataset.astype('float64') # normalize the dataset scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) # split into train and test sets train_size = int(len(dataset)) train = dataset[:, :] # reshape into X=t and Y=t+1 max_epochs = 200 look_back = 12 trainX, trainY = create_dataset(train, look_back) # reshape input to be [samples, time steps, features] trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1)) if trainX.shape[0] % 2 != 0: trainX = trainX[:-1] trainY = trainY[:-1] # create and fit the LSTM network min_batch_size = int(trainX.shape[0] * 0.01) batch_size = 0 cval = 1 while batch_size < min_batch_size: if trainX.shape[0] % (min_batch_size + cval) == 0: batch_size = min_batch_size + cval cval += 1 hist = History() # FOR EARLY STOPPING model = Sequential() model.add( LSTM(9, batch_input_shape=(batch_size, look_back, 1), stateful=True, return_sequences=True)) model.add(LSTM(6, stateful=True)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=0, shuffle=False, callbacks=[hist]) model.reset_states() print('BATCH_SIZE:', batch_size) hist_val = hist.history['loss'][0] for i in range(max_epochs): model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=0, shuffle=False, callbacks=[hist]) model.reset_states() print('EPOCHS:', i + 1, end='\r') ''' if i > 3 and (np.abs(hist_val-hist.history['loss'][0])/hist_val < 0.001 or hist_val==0): print('TOTAL_EPOCHS:', i+1) break else: hist_val= hist.history['loss'][0] # print(hist_val) ''' # make predictions trainPredict = model.predict(trainX, batch_size=batch_size) # invert predictions trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) # calculate root mean squared error trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0])) # print('Train Score: %.2f RMSE' % (trainScore)) # shift train predictions for plotting trainPredictPlot = np.empty_like(dataset) trainPredictPlot[:, :] = np.nan trainPredictPlot[look_back:len(trainPredict) + look_back, :] = trainPredict return trainPredictPlot
def logistic(csvname): data = pd.read_csv(csvname) headers = [ 'SourceIp', 'DestIp', 'SourcePort', 'destPort', 'Seq_num', 'Trans_Id', 'funcCode', 'Refno', 'Register_data', 'Exeption_Code', 'Time_Stamp', 'Relative_Time', 'eth_src', 'eth_dst', 'Alarm' ] data.columns = headers del (data['eth_src']) del (data['eth_dst']) X_Label = [i for i in data.columns.tolist() if i not in 'Alarm'] X, y = pre_split(data, X_Label) Train_Error = [] Test_Error = [] es = EarlyStopping(monitor='loss', min_delta=0.00005, patience=5) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) num_seq = X_train.shape[0] #num_sample = [10, 100, 500, 1000, 5000, 10000, 30000, 50000, 80000, 100000, 150000, 200000, num_seq] num_sample = [num_seq] for sample_size in num_sample: #print(sample_size,file=open("10fold/Log_10_fold_train.txt","a")) #print("Precision",',',"Recall",',',"F1",',',"Accuray",',',"loss",file=open("10fold/Log_10_fold_train.txt","a")) #print(sample_size,file=open("10fold/Log_10_fold_test.txt","a")) #print("Precision",',',"Recall",',',"F1",',',"Accuray",',',"loss",file=open("10fold/Log_10_fold_test.txt","a")) for kfold in range(1): model = Sequential() model.reset_states() model.add(Dense(y.shape[1], input_dim=12, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X_train[0:sample_size, :], y_train[0:sample_size], batch_size=256, epochs=10000, shuffle=True, callbacks=[es]) y_pre_train = model.predict_classes(X_train[0:sample_size, :]) y_pre_test = model.predict_classes(X_test) y_pre_train = y_pre_train.reshape(sample_size) ftrain = f1_score(y_true=y_train[0:sample_size], y_pred=y_pre_train) ptrain = precision_score(y_true=y_train[0:sample_size], y_pred=y_pre_train) rtrain = recall_score(y_true=y_train[0:sample_size], y_pred=y_pre_train) ftest = f1_score(y_true=y_test, y_pred=y_pre_test) ptest = precision_score(y_true=y_test, y_pred=y_pre_test) rtest = recall_score(y_true=y_test, y_pred=y_pre_test) atrain = model.evaluate(X_train[0:sample_size, :], y_train[0:sample_size]) atest = model.evaluate(X_test, y_test) acc_train = atrain[1] acc_test = atest[1] loss_train = atrain[0] loss_test = atest[0]
def build_LSTM(trainX, trainY, testX, testY): print 'baseline: ', mean_squared_error(testY, testX[:, -1]) batch_size = 25 model = Sequential() model.add( LSTM(20, batch_input_shape=(batch_size, LOOK_BACK, 1), return_sequences=True)) # model.add(BatchNormalization()) model.add(layers.core.Dropout(0.2)) model.add(LSTM(5, return_sequences=False)) model.add(layers.core.Dropout(0.2)) # model.add(Dense(5)) # model.add(layers.core.Dropout(0.2)) # model.add(BatchNormalization()) model.add(Dense(1)) lr = 0.005 decay = 0.95 nb_epoch = 100 adam = optimizers.adam(lr=lr) # sgd = optimizers.SGD(lr=0.005, clipnorm=0.1) model.compile(loss='mean_squared_error', optimizer=adam) print "TrainX: ", trainX.shape print "TrainY: ", trainY.shape print "TestX: ", testX.shape print "TestY: ", testY.shape for i in range(nb_epoch): rd = random.random() if rd < 0.95: # adam.__setattr__('lr', lr) adam.lr.set_value(lr) else: # adam.__setattr__('lr', lr*5) adam.lr.set_value(lr * 2) print 'i: ', i, ' lr: ', adam.lr.get_value( ) #adam.__getattribute__('lr') # adam.lr.get_value() model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=1, shuffle=True, validation_split=0.15) #validation_data=(testX, testY)) # model.reset_states() if i % 5 == 0: testPredict = model.predict(testX, batch_size=batch_size, verbose=1) print 'lstm_i: ', mean_squared_error(testY, testPredict) lr *= decay # for i in range(100): # model.fit(trainX, trainY, nb_epoch=1, batch_size=batch_size, verbose=2, shuffle=False) # model.reset_states() # make predictions trainPredict = model.predict(trainX, batch_size=batch_size, verbose=1) model.reset_states() testPredict = model.predict(testX, batch_size=batch_size, verbose=1) print 'baseline: ', mean_squared_error(testY, testX[:, -1]) print 'lstm: ', mean_squared_error(testY, testPredict) print 'avg(abs(.)): ', np.average(np.abs(testY))
def do_model(all_data, steps, dates): _steps = steps # trim = 100 # all_data = all_data[:trim] # dates = dates[:trim] print("steps:", _steps) features = all_data[:-_steps] labels = all_data[_steps:, -1:] X_train = features Y_train = labels optimiser = 'adam' hidden_neurons = 332 loss_function = 'mse' dropout = 0.0923 inner_hidden_neurons = 269 dropout_inner = 0.2269 batch_size = 1 X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) print("X train shape:\t", X_train.shape) # print("Y train shape:\t", Y_train.shape) # print("Y test shape:\t", Y_test.shape) # print("Steps:\t", _steps) in_neurons = X_train.shape[2] out_neurons = 1 model = Sequential() if 'gpu' in theano.config.device: gpu_cpu = 'gpu' else: gpu_cpu = 'cpu' model.add( LSTM(output_dim=hidden_neurons, input_dim=in_neurons, batch_input_shape=(1, 1, in_neurons), return_sequences=True, init='uniform', consume_less=gpu_cpu, stateful=True)) model.add(Dropout(dropout)) dense_input = inner_hidden_neurons model.add( LSTM(output_dim=dense_input, input_dim=hidden_neurons, return_sequences=False, consume_less=gpu_cpu, stateful=True)) model.add(Dropout(dropout_inner)) model.add(Activation('relu')) model.add(Dense(output_dim=out_neurons, input_dim=dense_input)) model.add(Activation('relu')) model.compile(loss=loss_function, optimizer=optimiser) # run through all data up to 23 April, 2013 progress = pyprind.ProgBar(len(X_train), width=50, stream=1) mean_tr_loss = [] seq = 0 inputs = zip(X_train, Y_train) for idx, tup in enumerate(inputs): x_chunk, y_chunk = tup tr_loss = model.train_on_batch(np.array([x_chunk]), y_chunk) mean_tr_loss.append(tr_loss) seq += 1 if seq % sequence_length == 0: model.reset_states() progress.update() # for x_chunk, y_chunk in chunks(X_train, Y_train, batch_size): # # for # # we need to reset states when we have an error value # if last_x is None: # # last_x = x_chunk # # tr_loss = model.train_on_batch(x_chunk, y_chunk) # # mean_tr_loss.append(tr_loss) # seq += 1 # if seq % sequence_length == 0: # model.reset_states() # progress.update() print("\nTraining Loss: {}".format(np.mean(mean_tr_loss))) return model
def build_model(self,predict,custom_batch_size=None): conf = self.conf model_conf = conf['model'] rnn_size = model_conf['rnn_size'] rnn_type = model_conf['rnn_type'] optimizer = model_conf['optimizer'] lr = model_conf['lr'] clipnorm = model_conf['clipnorm'] regularization = model_conf['regularization'] if optimizer == 'sgd': optimizer_class = SGD elif optimizer == 'adam': optimizer_class = Adam elif optimizer == 'rmsprop': optimizer_class = RMSprop elif optimizer == 'nadam': optimizer_class = Nadam else: optimizer = optimizer if lr is not None or clipnorm is not None: optimizer = optimizer_class(lr = lr,clipnorm=clipnorm) loss_fn = conf['data']['target'].loss#model_conf['loss'] dropout_prob = model_conf['dropout_prob'] length = model_conf['length'] pred_length = model_conf['pred_length'] skip = model_conf['skip'] stateful = model_conf['stateful'] return_sequences = model_conf['return_sequences'] output_activation = conf['data']['target'].activation#model_conf['output_activation'] num_signals = conf['data']['num_signals'] batch_size = self.conf['training']['batch_size'] if predict: batch_size = self.conf['model']['pred_batch_size'] #so we can predict with one time point at a time! if return_sequences: length =pred_length else: length = 1 if custom_batch_size is not None: batch_size = custom_batch_size if rnn_type == 'LSTM': rnn_model = LSTM elif rnn_type == 'SimpleRNN': rnn_model =SimpleRNN else: print('Unkown Model Type, exiting.') exit(1) batch_input_shape=(batch_size,length, num_signals) model = Sequential() # model.add(TimeDistributed(Dense(num_signals,bias=True),batch_input_shape=batch_input_shape)) for _ in range(model_conf['rnn_layers']): model.add(rnn_model(rnn_size, return_sequences=return_sequences,batch_input_shape=batch_input_shape, stateful=stateful,W_regularizer=l2(regularization),U_regularizer=l2(regularization), b_regularizer=l2(regularization),dropout_W=dropout_prob,dropout_U=dropout_prob)) model.add(Dropout(dropout_prob)) if return_sequences: model.add(TimeDistributed(Dense(1,activation=output_activation))) else: model.add(Dense(1,activation=output_activation)) model.compile(loss=loss_fn, optimizer=optimizer) model.reset_states() #model.compile(loss='mean_squared_error', optimizer='sgd') #for numerical output return model
simple_stats = Stats() simple_model=Sequential([ Embedding(max_words + 1, output_dim=vector_size, input_length=input_size, batch_input_shape=[batch_size, None]), SimpleRNN(state_size, return_sequences=True, stateful=True, activation=tf.nn.relu), Flatten(), Dense(64, activation=tf.nn.sigmoid), Dense(1, activation=tf.nn.sigmoid) ]) simple_model.compile(loss=losses.binary_crossentropy, optimizer='adam', metrics=['accuracy']) # print(simple_model.summary()) for i in range(epochs): simple_model.fit(X_train, Y, epochs=1, batch_size=batch_size, validation_data=(X_eval, Yv), callbacks=[simple_stats]) simple_model.reset_states() print(simple_model.summary()) # ************************************************** # # EVALUATION OF THE MODELS # # ************************************************** # print("\n* Evaluating accuracy of the Simple RNN model") simple_accuracy = simple_model.evaluate(X_test, Yt, batch_size=batch_size, verbose=1, callbacks=[simple_stats]) print("Accuracy: {}%".format(round(simple_accuracy[1]*100, 2))) # ************************************************** # # Saving the model # # ************************************************** #
training_model.add(Activation('softmax')) optimizer = RMSprop(lr=0.01) training_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) ### Batch train training_model epochs = 10 for i in range(epochs): training_model.fit(x=x, y=y, batch_size=training_batch_size, epochs=1, shuffle=False) training_model.reset_states() ### Predict single characters at a time with a stateful model prediction_batch_size = 1 prediction_model = Sequential() prediction_model.add( GRU(128, batch_input_shape=(prediction_batch_size, x.shape[1], x.shape[2]), return_sequences=True, stateful=True)) prediction_model.add(Activation('relu')) prediction_model.add(GRU(128, return_sequences=True, stateful=True)) prediction_model.add(Activation('relu')) prediction_model.add(Dense(len(chars))) prediction_model.add(Activation('softmax')) prediction_model.set_weights(training_model.get_weights())
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--dataset', type=str, help='Dataset file', required=True) parser.add_argument('--splitfile', type=str, help='Split file', required=True) parser.add_argument('--hiddenunits', type=int, help='Number of LSTM hidden units.', default=200, required=False) parser.add_argument('--batchsize', type=int, help='Number of sequences to process in a batch.', default=5, required=False) parser.add_argument('--timewindow', type=int, help='Number of timesteps to process in a batch.', default=100, required=False) parser.add_argument('--epochs', type=int, help='Number of epochs.', default=50, required=False) args = parser.parse_args() dataset = args.dataset split_file = args.splitfile hidden_units = args.hiddenunits batch_size = args.batchsize time_window = args.timewindow epochs = args.epochs model_file = dataset + '.model_weights' history_file = dataset + '.history' preds_file = dataset + '.preds' overall_loss = [0.0] preds = [] history = [] # load dataset training_seqs, testing_seqs, num_skills = load_dataset(dataset, split_file) print "Training Sequences: %d" % len(training_seqs) print "Testing Sequences: %d" % len(testing_seqs) print "Number of skills: %d" % num_skills # Our loss function # The model gives predictions for all skills so we need to get the # prediction for the skill at time t. We do that by taking the column-wise # dot product between the predictions at each time slice and a # one-hot encoding of the skill at time t. # y_true: (nsamples x nsteps x nskills+1) # y_pred: (nsamples x nsteps x nskills) def loss_function(y_true, y_pred): skill = y_true[:,:,0:num_skills] obs = y_true[:,:,num_skills] rel_pred = Th.sum(y_pred * skill, axis=2) # keras implementation does a mean on the last dimension (axis=-1) which # it assumes is a singleton dimension. But in our context that would # be wrong. return K.binary_crossentropy(rel_pred, obs) # build model model = Sequential() # ignore padding model.add(Masking(-1.0, batch_input_shape=(batch_size, time_window, num_skills*2))) # lstm configured to keep states between batches model.add(LSTM(input_dim = num_skills*2, output_dim = hidden_units, return_sequences=True, batch_input_shape=(batch_size, time_window, num_skills*2), stateful = True )) # readout layer. TimeDistributedDense uses the same weights for all # time steps. model.add(TimeDistributedDense(input_dim = hidden_units, output_dim = num_skills, activation='sigmoid')) # optimize with rmsprop which dynamically adapts the learning # rate of each weight. model.compile(loss=loss_function, optimizer='rmsprop',class_mode="binary") # training function def trainer(X, Y): overall_loss[0] += model.train_on_batch(X, Y)[0] # prediction def predictor(X, Y): batch_activations = model.predict_on_batch(X) skill = Y[:,:,0:num_skills] obs = Y[:,:,num_skills] y_pred = np.squeeze(np.array(batch_activations)) rel_pred = np.sum(y_pred * skill, axis=2) for b in xrange(0, X.shape[0]): for t in xrange(0, X.shape[1]): if X[b, t, 0] == -1.0: continue preds.append((rel_pred[b][t], obs[b][t])) # call when prediction batch is finished # resets LSTM state because we are done with all sequences in the batch def finished_prediction_batch(percent_done): model.reset_states() # similiar to the above def finished_batch(percent_done): print "(%4.3f %%) %f" % (percent_done, overall_loss[0]) model.reset_states() # run the model for e in xrange(0, epochs): model.reset_states() # train run_func(training_seqs, num_skills, trainer, batch_size, time_window, finished_batch) model.reset_states() # test run_func(testing_seqs, num_skills, predictor, batch_size, time_window, finished_prediction_batch) # compute AUC auc = roc_auc_score([p[1] for p in preds], [p[0] for p in preds]) # log history.append((overall_loss[0], auc)) # save model model.save_weights(model_file, overwrite=True) print "==== Epoch: %d, Test AUC: %f" % (e, auc) # reset loss overall_loss[0] = 0.0 # save predictions with open(preds_file, 'w') as f: f.write('was_heldout\tprob_recall\tstudent_recalled\n') for pred in preds: f.write('1\t%f\t%d\n' % (pred[0], pred[1])) with open(history_file, 'w') as f: for h in history: f.write('\t'.join([str(he) for he in h])) f.write('\n') # clear preds preds = []
def test_recurrent_convolutional(): nb_row = 3 nb_col = 3 nb_filter = 5 nb_samples = 2 input_channel = 2 input_nb_row = 5 input_nb_col = 5 sequence_len = 2 for dim_ordering in ['th', 'tf']: if dim_ordering == 'th': input = np.random.rand(nb_samples, sequence_len, input_channel, input_nb_row, input_nb_col) else: # tf input = np.random.rand(nb_samples, sequence_len, input_nb_row, input_nb_col, input_channel) for return_sequences in [True, False]: # test for ouptput shape: output = layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'border_mode': "same"}, input_shape=input.shape) output_shape = [nb_samples, input_nb_row, input_nb_col] if dim_ordering == 'th': output_shape.insert(1, nb_filter) else: output_shape.insert(3, nb_filter) if return_sequences: output_shape.insert(1, sequence_len) assert output.shape == tuple(output_shape) # No need to check statefulness for both if dim_ordering == 'th' or return_sequences: continue # Tests for statefulness model = Sequential() kwargs = {'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'stateful': True, 'batch_input_shape': input.shape, 'border_mode': "same"} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(input)) assert(out1.shape == tuple(output_shape)) # train once so that the states change model.train_on_batch(np.ones_like(input), np.ones_like(output)) out2 = model.predict(np.ones_like(input)) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(input)) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(input)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(input)) assert(out4.max() != out5.max()) # check regularizers kwargs = {'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'stateful': True, 'batch_input_shape': input.shape, 'W_regularizer': regularizers.WeightRegularizer(l1=0.01), 'U_regularizer': regularizers.WeightRegularizer(l1=0.01), 'b_regularizer': 'l2', 'border_mode': "same"} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.set_input(K.variable(np.ones(input.shape)), shape=input.shape) K.eval(layer.output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'border_mode': "same", 'dropout_W': 0.1, 'dropout_U': 0.1}, input_shape=input.shape)
#Save weights name = 'neumonia_dataset_interson_keras_alldata_{0}_weights_cnn_{0}.h5'.format(number_db,(number_db)) print(name) model.save_weights(name,overwrite=True) #l = h5py.File("loss_history_{0}.hdf5".format(number_db), "w") #dset = f.create_dataset("loss_history_{0}".format(number_db), (100,), dtype='i') #A way to open a model with weights in the same arquitecture ''' json_string = model.to_json() open('my_model_architecture.json', 'w').write(json_string) model.save_weights('my_model_weights.h5') ''' import cPickle f = open('cnn_loss_{0}.pkl'.format(number_db),'wb') cPickle.dump(history.losses,f,protocol=cPickle.HIGHEST_PROTOCOL) f.close() h = open('cnn_metrics_{0}.pkl'.format(number_db),'wb') p = [sensitivity, specificity, F1, mcc] cPickle.dump(p ,h,protocol=cPickle.HIGHEST_PROTOCOL) h.close() model.reset_states() #import matplotlib.pylab as plt #plt.plot(history.losses,'bo') #plt.xlabel('Iteration') #plt.ylabel('Binary Cross Entropy') #plt.show()
def lstm_model( data, hidden_layer_neurons, epochs, batch_size=1, feature_dimensions=1, verbose=False): """Build an LSTM model. Args: data: Data frame of X, Y values hidden_layer_neurons: Number of neurons per layers epochs: Number of iterations for learning batch_size feature_dimensions: Dimension of features (Number of rows per feature) Returns: model: Graph of LSTM model """ # Initialize key variables start = time.time() # Process the data for fitting x_values, y_values = data[:, 0: -1], data[:, -1] x_shaped = x_values.reshape(x_values.shape[0], 1, x_values.shape[1]) # Let's do some learning! model = Sequential() ''' The Long Short-Term Memory network (LSTM) is a type of Recurrent Neural Network (RNN). A benefit of this type of network is that it can learn and remember over long sequences and does not rely on a pre-specified window lagged observation as input. In Keras, this is referred to as being "stateful", and involves setting the "stateful" argument to "True" when defining an LSTM layer. By default, an LSTM layer in Keras maintains state between data within one batch. A batch of data is a fixed-sized number of rows from the training dataset that defines how many patterns (sequences) to process before updating the weights of the network. A state is: Where am I now inside a sequence? Which time step is it? How is this particular sequence behaving since its beginning up to now? A weight is: What do I know about the general behavior of all sequences I've seen so far? State in the LSTM layer between batches is cleared by default. This is undesirable therefore we must make the LSTM stateful. This gives us fine-grained control over when state of the LSTM layer is cleared, by calling the reset_states() function during the model.fit() method. LSTM networks can be stacked in Keras in the same way that other layer types can be stacked. One addition to the configuration that is required is that an LSTM layer prior to each subsequent LSTM layer must return the sequence. This can be done by setting the return_sequences parameter on the layer to True. batch_size denotes the subset size of your training sample (e.g. 100 out of 1000) which is going to be used in order to train the network during its learning process. Each batch trains network in a successive order, taking into account the updated weights coming from the appliance of the previous batch. return_sequence indicates if a recurrent layer of the network should return its entire output sequence (i.e. a sequence of vectors of specific dimension) to the next layer of the network, or just its last only output which is a single vector of the same dimension. This value can be useful for networks conforming with an RNN architecture. batch_input_shape defines that the sequential classification of the neural network can accept input data of the defined only batch size, restricting in that way the creation of any variable dimension vector. It is widely used in stacked LSTM networks. It is a tuple of (batch_size, timesteps, data_dimension) ''' timesteps = x_shaped.shape[1] data_dimension = x_shaped.shape[2] # Add layers to the model model.add( LSTM( units=hidden_layer_neurons, batch_input_shape=(batch_size, timesteps, data_dimension), return_sequences=True, stateful=True ) ) model.add(Dropout(0.2)) model.add( LSTM( units=hidden_layer_neurons, batch_input_shape=(batch_size, timesteps, data_dimension), return_sequences=False, stateful=True ) ) model.add(Dropout(0.2)) model.add( Dense( units=feature_dimensions ) ) # model.add(Activation('linear')) ''' Once the network is specified, it must be compiled into an efficient symbolic representation using a backend mathematical library, such as TensorFlow. In compiling the network, we must specify a loss function and optimization algorithm. We will use "mean_squared_error" or "mse" as the loss function as it closely matches RMSE that we will are interested in, and the efficient ADAM optimization algorithm. ''' model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) ''' Once the model is compiled, the network can be fit to the training data. Because the network is stateful, we must control when the internal state is reset. Therefore, we must manually manage the training process one epoch at a time across the desired number of epochs. By default, the samples within an epoch are shuffled prior to being exposed to the network. Again, this is undesirable for the LSTM because we want the network to build up state as it learns across the sequence of observations. We can disable the shuffling of samples by setting "shuffle" to "False". ''' for _ in range(epochs): model.fit( x_shaped, y_values, batch_size=batch_size, shuffle=False, epochs=1, verbose=verbose, validation_split=0.05) ''' When the fit process reaches the total length of the samples, model.reset_states() is called to reset the internal state at the end of the training epoch, ready for the next training iteration. This iteration will start training from the beginning of the dataset therefore state will need to be reset as the previous state would only be relevant to the prior epoch iteration. ''' model.reset_states() print('\n> Training Time: {:20.2f}'.format(time.time() - start)) return model
model.summary() model.compile(loss='mse', optimizer='adam', metrics=['mse']) from keras.callbacks import EarlyStopping early_stopping = EarlyStopping(monitor='loss', patience=100, mode='auto') th_hist = keras.callbacks.TensorBoard(log_dir='./graph', histogram_freq = 0, write_graph=True, write_images=True) # shuffle=False (그 전의 훈련한 상태를 섞지 않고 가져오겠다..) # model.reset_states() (상태 유지에선 항상 붙는다.<상태값 자체는 변하지 않음.>) num_epochs = 100 for epoch_idx in range(num_epochs): print('epochs : ' + str(epoch_idx)) history = model.fit(x_train, y_train, epochs=1, batch_size=batch_size, callbacks=[early_stopping, th_hist], verbose=2, shuffle=False, validation_data=(x_test,y_test)) model.reset_states() # 상태유지에서는 fit 할 때마다 넣는다(암기) history_mse.append(history.history['mean_squared_error']) history_val_mse.append(history.history['val_mean_squared_error']) mse, _ = model.evaluate(x_train, y_train, batch_size=batch_size) print("mse : ", mse) model.reset_states() y_predict = model.predict(x_test, batch_size=batch_size) print(y_predict[0:5]) # 앞 5개를 출력 # RMSE 구하기 (RMSE: 낮을수록 좋다.) from sklearn.metrics import mean_squared_error def RMSE(y_test, y_predict): # y_test 와 y_predict 비교하기 위한 함수 (원래의 값과 예측값을 비교) return np.sqrt(mean_squared_error(y_test, y_predict)) # 비교하여 그 차이를 빼준다 print("RMSE : ", RMSE(y_test, y_predict))
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('--dataset', type=str, help='Dataset file', required=True) parser.add_argument('--splitfile', type=str, help='Split file', required=True) parser.add_argument('--hiddenunits', type=int, help='Number of LSTM hidden units.', default=200, required=False) parser.add_argument('--batchsize', type=int, help='Number of sequences to process in a batch.', default=5, required=False) parser.add_argument('--timewindow', type=int, help='Number of timesteps to process in a batch.', default=100, required=False) parser.add_argument('--epochs', type=int, help='Number of epochs.', default=50, required=False) args = parser.parse_args() dataset = args.dataset split_file = args.splitfile hidden_units = args.hiddenunits batch_size = args.batchsize time_window = args.timewindow epochs = args.epochs model_file = dataset + '.model_weights' history_file = dataset + '.history' preds_file = dataset + '.preds' overall_loss = [0.0] preds = [] history = [] # load dataset training_seqs, testing_seqs, num_skills = load_dataset(dataset, split_file) print "Training Sequences: %d" % len(training_seqs) print "Testing Sequences: %d" % len(testing_seqs) print "Number of skills: %d" % num_skills # Our loss function # The model gives predictions for all skills so we need to get the # prediction for the skill at time t. We do that by taking the column-wise # dot product between the predictions at each time slice and a # one-hot encoding of the skill at time t. # y_true: (nsamples x nsteps x nskills+1) # y_pred: (nsamples x nsteps x nskills) def loss_function(y_true, y_pred): skill = y_true[:, :, 0:num_skills] obs = y_true[:, :, num_skills] rel_pred = Th.sum(y_pred * skill, axis=2) # keras implementation does a mean on the last dimension (axis=-1) which # it assumes is a singleton dimension. But in our context that would # be wrong. return K.binary_crossentropy(rel_pred, obs) # build model model = Sequential() # ignore padding model.add( Masking(-1.0, batch_input_shape=(batch_size, time_window, num_skills * 2))) # lstm configured to keep states between batches model.add( LSTM(input_dim=num_skills * 2, output_dim=hidden_units, return_sequences=True, batch_input_shape=(batch_size, time_window, num_skills * 2), stateful=True)) # readout layer. TimeDistributedDense uses the same weights for all # time steps. model.add( TimeDistributedDense(input_dim=hidden_units, output_dim=num_skills, activation='sigmoid')) # optimize with rmsprop which dynamically adapts the learning # rate of each weight. model.compile(loss=loss_function, optimizer='rmsprop', class_mode="binary") # training function def trainer(X, Y): overall_loss[0] += model.train_on_batch(X, Y)[0] # prediction def predictor(X, Y): batch_activations = model.predict_on_batch(X) skill = Y[:, :, 0:num_skills] obs = Y[:, :, num_skills] y_pred = np.squeeze(np.array(batch_activations)) rel_pred = np.sum(y_pred * skill, axis=2) for b in xrange(0, X.shape[0]): for t in xrange(0, X.shape[1]): if X[b, t, 0] == -1.0: continue preds.append((rel_pred[b][t], obs[b][t])) # call when prediction batch is finished # resets LSTM state because we are done with all sequences in the batch def finished_prediction_batch(percent_done): model.reset_states() # similiar to the above def finished_batch(percent_done): print "(%4.3f %%) %f" % (percent_done, overall_loss[0]) model.reset_states() # run the model for e in xrange(0, epochs): model.reset_states() # train run_func(training_seqs, num_skills, trainer, batch_size, time_window, finished_batch) model.reset_states() # test run_func(testing_seqs, num_skills, predictor, batch_size, time_window, finished_prediction_batch) # compute AUC auc = roc_auc_score([p[1] for p in preds], [p[0] for p in preds]) # log history.append((overall_loss[0], auc)) # save model model.save_weights(model_file, overwrite=True) print "==== Epoch: %d, Test AUC: %f" % (e, auc) # reset loss overall_loss[0] = 0.0 # save predictions with open(preds_file, 'w') as f: f.write('was_heldout\tprob_recall\tstudent_recalled\n') for pred in preds: f.write('1\t%f\t%d\n' % (pred[0], pred[1])) with open(history_file, 'w') as f: for h in history: f.write('\t'.join([str(he) for he in h])) f.write('\n') # clear preds preds = []
def lstm_model(data, hidden_layer_neurons, epochs, batch_size=1, feature_dimensions=1, verbose=False): """Build an LSTM model. Args: data: Data frame of X, Y values hidden_layer_neurons: Number of neurons per layers epochs: Number of iterations for learning batch_size feature_dimensions: Dimension of features (Number of rows per feature) Returns: model: Graph of LSTM model """ # Initialize key variables start = time.time() # Process the data for fitting x_values, y_values = data[:, 0:-1], data[:, -1] x_shaped = x_values.reshape(x_values.shape[0], 1, x_values.shape[1]) # Let's do some learning! model = Sequential() ''' The Long Short-Term Memory network (LSTM) is a type of Recurrent Neural Network (RNN). A benefit of this type of network is that it can learn and remember over long sequences and does not rely on a pre-specified window lagged observation as input. In Keras, this is referred to as being "stateful", and involves setting the "stateful" argument to "True" when defining an LSTM layer. By default, an LSTM layer in Keras maintains state between data within one batch. A batch of data is a fixed-sized number of rows from the training dataset that defines how many patterns (sequences) to process before updating the weights of the network. A state is: Where am I now inside a sequence? Which time step is it? How is this particular sequence behaving since its beginning up to now? A weight is: What do I know about the general behavior of all sequences I've seen so far? State in the LSTM layer between batches is cleared by default. This is undesirable therefore we must make the LSTM stateful. This gives us fine-grained control over when state of the LSTM layer is cleared, by calling the reset_states() function during the model.fit() method. LSTM networks can be stacked in Keras in the same way that other layer types can be stacked. One addition to the configuration that is required is that an LSTM layer prior to each subsequent LSTM layer must return the sequence. This can be done by setting the return_sequences parameter on the layer to True. batch_size denotes the subset size of your training sample (e.g. 100 out of 1000) which is going to be used in order to train the network during its learning process. Each batch trains network in a successive order, taking into account the updated weights coming from the appliance of the previous batch. return_sequence indicates if a recurrent layer of the network should return its entire output sequence (i.e. a sequence of vectors of specific dimension) to the next layer of the network, or just its last only output which is a single vector of the same dimension. This value can be useful for networks conforming with an RNN architecture. batch_input_shape defines that the sequential classification of the neural network can accept input data of the defined only batch size, restricting in that way the creation of any variable dimension vector. It is widely used in stacked LSTM networks. It is a tuple of (batch_size, timesteps, data_dimension) ''' timesteps = x_shaped.shape[1] data_dimension = x_shaped.shape[2] # Add layers to the model model.add( LSTM(units=hidden_layer_neurons, batch_input_shape=(batch_size, timesteps, data_dimension), return_sequences=True, stateful=True)) model.add(Dropout(0.2)) model.add( LSTM(units=hidden_layer_neurons, batch_input_shape=(batch_size, timesteps, data_dimension), return_sequences=False, stateful=True)) model.add(Dropout(0.2)) model.add(Dense(units=feature_dimensions)) # model.add(Activation('linear')) ''' Once the network is specified, it must be compiled into an efficient symbolic representation using a backend mathematical library, such as TensorFlow. In compiling the network, we must specify a loss function and optimization algorithm. We will use "mean_squared_error" or "mse" as the loss function as it closely matches RMSE that we will are interested in, and the efficient ADAM optimization algorithm. ''' model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) ''' Once the model is compiled, the network can be fit to the training data. Because the network is stateful, we must control when the internal state is reset. Therefore, we must manually manage the training process one epoch at a time across the desired number of epochs. By default, the samples within an epoch are shuffled prior to being exposed to the network. Again, this is undesirable for the LSTM because we want the network to build up state as it learns across the sequence of observations. We can disable the shuffling of samples by setting "shuffle" to "False". ''' for _ in range(epochs): model.fit(x_shaped, y_values, batch_size=batch_size, shuffle=False, epochs=1, verbose=verbose, validation_split=0.05) ''' When the fit process reaches the total length of the samples, model.reset_states() is called to reset the internal state at the end of the training epoch, ready for the next training iteration. This iteration will start training from the beginning of the dataset therefore state will need to be reset as the previous state would only be relevant to the prior epoch iteration. ''' model.reset_states() print('\n> Training Time: {:20.2f}'.format(time.time() - start)) return model
def test_convolutional_recurrent(): num_row = 3 num_col = 3 filters = 5 num_samples = 2 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, sequence_len, input_channel, input_num_row, input_num_col) else: inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) for return_sequences in [True, False]: # test for output shape: output = layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid'}, input_shape=inputs.shape) # No need to check following tests for both data formats if data_format == 'channels_first' or return_sequences: continue # Tests for statefulness model = Sequential() kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) assert(out4.max() != out5.max()) # check regularizers kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': regularizers.L1L2(l1=0.01), 'recurrent_regularizer': regularizers.L1L2(l1=0.01), 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(inputs.shape) assert len(layer.losses) == 3 assert layer.activity_regularizer output = layer(K.variable(np.ones(inputs.shape))) assert len(layer.losses) == 4 K.eval(output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1}, input_shape=inputs.shape) # check state initialization layer = convolutional_recurrent.ConvLSTM2D(filters=filters, kernel_size=(num_row, num_col), data_format=data_format, return_sequences=return_sequences) layer.build(inputs.shape) x = Input(batch_shape=inputs.shape) initial_state = layer.get_initial_state(x) y = layer(x, initial_state=initial_state) model = Model(x, y) assert model.predict(inputs).shape == layer.compute_output_shape(inputs.shape)
model3.compile(loss='mean_squared_error', optimizer='adam') # Try SGD, adam, adagrad and compare!!! early_stop = EarlyStopping(monitor='loss', patience=1, verbose=1) # model fitting for i in range(10): print("model1 fitting : adagrad & ", i, " fitting") model1.summary() model1.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2, callbacks=[early_stop]) model1.reset_states() print("model2 fitting : SGD & ", i, " fitting") model2.summary() model2.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2, callbacks=[early_stop]) model2.reset_states() print("model3 fitting : adam & ", i, " fitting") model3.summary() model3.fit(trainX, trainY, epochs=100, batch_size=1,
class TimeSeriesPredictorLSTM(TimeSeriesPredictorBase): """TimeSeriesPredictorLSTM class LSTM class for Time Series Predictor Models """ def __init__(self, model_prefix='lstm_model'): TimeSeriesPredictorBase.__init__(self) self.is_lstm = True self.is_stateful = False self.model_prefix = model_prefix self.params['stacked_units'] = [4] self.params['op_units'] = 1 def model(self): self.model = Sequential() # Stacked if self.is_stateful == False: if self.use_time_steps == False: self.model.add( LSTM(self.params['stacked_units'][0], input_shape=(1, self.params['look_back']))) else: self.model.add( LSTM(self.params['stacked_units'][0], input_shape=(self.params['look_back'], 1))) else: stack = len(self.params['stacked_units']) if stack == 1: self.model.add( LSTM(self.params['stacked_units'][0], batch_input_shape=(self.params['batch_size'], self.params['look_back'], 1), stateful=True)) else: self.model.add( LSTM(self.params['stacked_units'][0], batch_input_shape=(self.params['batch_size'], self.params['look_back'], 1), stateful=True, return_sequences=True)) for i in range(1, stack): self.model.add( LSTM(self.params['stacked_units'][i], batch_input_shape=(self.params['batch_size'], self.params['look_back'], 1), stateful=True)) # Output self.model.add(Dense(self.params['op_units'])) # Compile self.model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy']) def train(self, train_x, train_y, test_x, test_y): if self.is_stateful == False: return self.model.fit(train_x, train_y, epochs=self.params['epochs'], validation_data=(test_x, test_y), batch_size=self.params['batch_size'], verbose=self.verbose, shuffle=False) else: history = None for i in range(self.params['epochs']): hist = self.model.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y), batch_size=self.params['batch_size'], verbose=self.verbose, shuffle=False) # Save history manually if history == None: history = hist else: for key in history.history: history.history[key].append(hist.history[key][0]) # reset state self.model.reset_states() return history def evaluate(self, x, y): # Model performance return self.model.evaluate(x, y, batch_size=self.params['batch_size'], verbose=self.verbose) def predict(self, data): return self.model.predict(data, batch_size=self.params['batch_size'])
def generate_audio(X, Y, seed_X): """ X: array of input sequences Y: next value for each input sequence seed_X: a single sequence to use as seed for generation """ # reshape to input format needed for the NN X = np.reshape(X, (X.shape[0], X.shape[1], 1)) seed_X = np.reshape(seed_X, (seed_X.shape[0], seed_X.shape[1], 1)) # train new model or use pre trained model? USE_SAVED_MODEL = False model_arch_file = 'model_architecture.json' model_weight_file = 'model_weights.h5' print "Architecture file:", model_arch_file print "Weight file:", model_weight_file model = None if USE_SAVED_MODEL: print "Loading model ..." model = model_from_json(open(model_arch_file).read()) model.load_weights(model_weight_file) else: model = Sequential() layers = [1, 10, 20, 1] # add layers model.add(LSTM( input_dim=layers[0], output_dim=layers[1], return_sequences=True, # stateful=True, # batch_input_shape=(32, 49, 1) )) model.add(Dropout(0.2)) model.add(LSTM( layers[2], return_sequences=False, # stateful=True, # batch_input_shape=(32, 49, 1) )) model.add(Dropout(0.2)) model.add(Dense( output_dim=layers[3])) model.add(Activation("linear")) # save model print "Saving model ..." json_string = model.to_json() open(model_arch_file, 'w').write(json_string) model.save_weights(model_weight_file, overwrite=True) # compile model in both cases start = time.time() print "Started compilation: ", start model.compile(loss="mse", optimizer="rmsprop") print "Compilation Time: ", time.time() - start # train if using new model if not USE_SAVED_MODEL: # train model.fit(X, Y, batch_size=32, nb_epoch=5, validation_split=0.05 ) # generate new sequence model.reset_states() gen_seconds = 3 generated = [None for i in range(DEFAULT_RATE) * gen_seconds] # generate 5 seconds of new music print seed_X.shape for i in xrange(DEFAULT_RATE * gen_seconds): sys.stdout.write("\r" + str(float(i)/(DEFAULT_RATE * gen_seconds))) predicted = model.predict(seed_X)[0] generated[i] = predicted seed_X[0,:,0] = np.append(seed_X[0,1:,0], predicted) return np.array(generated)