def compression(tr_data,test_data): #normalize data #Create autoencoder models model=autoencoderKeras.create_model(num_of_hidden_layers,len(tr_data[0,:]),num_of_neurons,layer_activation,output_activation) #pretraining model=autoencoderKeras.pretrain(model,tr_data,0.8,layer_activation,output_activation,'RMSprop') weights1=model.get_weights() #training model=autoencoderKeras.overall_train(model,tr_data,0.1,'RMSprop') #test of ae on training set tr_pred = model.predict(tr_data) #test of ae on test set test_pred = model.predict(test_data) #coding model code_model=Sequential() for i in range(num_of_hidden_layers): if i==0: code_model.add(Dense(num_of_neurons[i],activation='relu',input_dim=len(tr_data[0,:]))) else: code_model.add(Dense(num_of_neurons[i],activation='relu')) #copy trained weights weights=model.get_weights() code_model.set_weights(weights[0:2*num_of_hidden_layers]) #codes test_code=code_model.predict(test_data) tr_code=code_model.predict(tr_data) return tr_pred,test_pred,tr_code,test_code,weights1
def test_saving_overwrite_option_gcs(): model = Sequential() model.add(Dense(2, input_shape=(3,))) org_weights = model.get_weights() new_weights = [np.random.random(w.shape) for w in org_weights] with tf_file_io_proxy('keras.engine.saving.tf_file_io') as file_io_proxy: gcs_filepath = file_io_proxy.get_filepath( filename='test_saving_overwrite_option_gcs.h5') # we should not use same filename in several tests to allow for parallel # execution save_model(model, gcs_filepath) model.set_weights(new_weights) with patch('keras.engine.saving.ask_to_proceed_with_overwrite') as ask: ask.return_value = False save_model(model, gcs_filepath, overwrite=False) ask.assert_called_once() new_model = load_model(gcs_filepath) for w, org_w in zip(new_model.get_weights(), org_weights): assert_allclose(w, org_w) ask.return_value = True save_model(model, gcs_filepath, overwrite=False) assert ask.call_count == 2 new_model = load_model(gcs_filepath) for w, new_w in zip(new_model.get_weights(), new_weights): assert_allclose(w, new_w) file_io_proxy.delete_file(gcs_filepath) # cleanup
def test_saving_overwrite_option(): model = Sequential() model.add(Dense(2, input_shape=(3,))) org_weights = model.get_weights() new_weights = [np.random.random(w.shape) for w in org_weights] _, fname = tempfile.mkstemp('.h5') save_model(model, fname) model.set_weights(new_weights) with patch('keras.engine.saving.ask_to_proceed_with_overwrite') as ask: ask.return_value = False save_model(model, fname, overwrite=False) ask.assert_called_once() new_model = load_model(fname) for w, org_w in zip(new_model.get_weights(), org_weights): assert_allclose(w, org_w) ask.return_value = True save_model(model, fname, overwrite=False) assert ask.call_count == 2 new_model = load_model(fname) for w, new_w in zip(new_model.get_weights(), new_weights): assert_allclose(w, new_w) os.remove(fname)
def build_overkill_stacked_lstm_regularized_dropout(dx, dh, do, length, weights=None): model = Sequential() model.add(LSTM( dh, input_dim=dx, return_sequences=True, W_regularizer='l2', U_regularizer='l2', b_regularizer='l2' )) model.add(Dropout(0.2)) model.add(LSTM( 512, input_dim=dh, return_sequences=True, W_regularizer='l2', U_regularizer='l2', b_regularizer='l2' )) model.add(Dropout(0.2)) model.add(LSTM( do, input_dim=512, return_sequences=True, activation='linear', W_regularizer='l2', U_regularizer='l2', b_regularizer='l2' )) if weights is not None: model.set_weights(weights) return model
def build_train_lstm_mse(dx, dh, do, span=1, weights=None, batch_size=2): model = Sequential() model.add(LSTM( dh, input_dim=dx, return_sequences=False )) model.add(Dense(do)) if weights is not None: model.set_weights(weights) return model
def build_test_rnn_mse(dx, dh, do, weights=None): model = Sequential() model.add(SimpleRNN( dh, input_dim=dx, return_sequences=True )) model.add(TimeDistributed(Dense(do))) if weights is not None: model.set_weights(weights) return model
def build_simple_rnn_stateful(dx, dh, do, length, weights=None, batch_size=1): model = Sequential() model.add(SimpleRNN( dh, batch_input_shape=(batch_size, 1, dx), return_sequences=True, stateful=True )) model.add(TimeDistributed(Dense(do))) if weights is not None: model.set_weights(weights) return model
def build_softmax_rnn(dx, dh, do, length, weights=None): model = Sequential() model.add(SimpleRNN( dh, input_dim=dx, return_sequences=True )) model.add(TimeDistributed(Dense(do), activation='softmax')) if weights is not None: model.set_weights(weights) return model
def build_test_lstm_softmax(dx, dh, do, weights=None): model = Sequential() model.add(LSTM( dh, input_dim=dx, return_sequences=True )) model.add(TimeDistributed(Dense(do))) model.add(TimeDistributed(Activation('softmax'))) if weights is not None: model.set_weights(weights) return model
def build_test_lstm_mse(dx, dh, do, weights=None): model = Sequential() model.add(LSTM( dh, input_dim=dx, return_sequences=True )) model.add(TimeDistributed(Dense(do))) if weights is not None: print(len(weights)) model.set_weights(weights) return model
def build_lstm_stateful_softmax(dx, dh, do, length=1, weights=None, batch_size=1): model = Sequential() model.add(LSTM( dh, batch_input_shape=(batch_size, length, dx), return_sequences=False, stateful=True )) model.add(Dense(do)) model.add(Activation('softmax')) if weights is not None: model.set_weights(weights) return model
def test(): with open("save_weight.pickle", mode="rb") as f: weights = pickle.load(f) model = Sequential() model.add(Dense(output_dim=100, input_dim=28*28)) model.add(Activation("relu")) model.set_weights(weights) layey1_value = model.predict(X_test[:5]) y_pred = np_utils.categorical_probas_to_classes(y) Y = np_utils.categorical_probas_to_classes(y_test) print np_utils.accuracy(y_pred,Y) print y_pred.shape
def build_stacked_rnn(dx, dh, do, length, weights=None): model = Sequential() model.add(SimpleRNN( dh, input_dim=dx, return_sequences=True )) model.add(SimpleRNN( do, input_dim=dh, return_sequences=True, )) if weights is not None: model.set_weights(weights) return model
def build_stacked_lstm(dx, dh, do, length, weights=None): model = Sequential() model.add(LSTM( dh, input_dim=dx, return_sequences=True )) model.add(LSTM( do, input_dim=dh, return_sequences=True )) model.add(TimeDistributed(Dense(do))) if weights is not None: model.set_weights(weights) return model
def build_train_stacked_lstm_dropout_softmax(dx, dh, do, span=1, weights=None, batch_size=2): model = Sequential() model.add(LSTM( dh, input_dim=dx, return_sequences=True )) model.add(Dropout(0.2)) model.add(LSTM( dh, input_dim=dh, return_sequences=False )) model.add(Dense(do)) model.add(Activation('softmax')) if weights is not None: model.set_weights(weights) return model
def build_stacked_lstm_mse_stateful(dx, dh, do, length, weights=None, batch_size=5): model = Sequential() model.add(LSTM( dh, batch_input_shape=(batch_size, 1, dx), return_sequences=True, stateful=True )) model.add(LSTM( do, batch_input_shape=(batch_size, 1, dh), return_sequences=True, stateful=True )) model.add(TimeDistributed(Dense(do))) if weights is not None: model.set_weights(weights) return model
class brain: def __init__(self, model): if (model == None): self.model = Sequential() self.model.add( Dense(8, activation="tanh", input_dim=6, kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=None))) self.model.add( Dense(3, activation="tanh", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=None))) self.model.compile(loss='mean_squared_error', optimizer='adam') else: self.model = model def getOutputs(self, inputs): inputs.append(1) return self.model.predict(np.asarray([inputs])) def mutate(self, brain1, brain2): newBrain = [] for i in range(0, len(self.model.get_weights()), 2): newWeights = [] b1weights = brain1.get_weights()[i] b2weights = brain2.get_weights()[i] for n in range(len(b1weights)): w = [] for m in range(len(b1weights[0])): r = random() k = 0 if random() < 0.1: k = randint(-100, 100) / 100 if (r < 0.4): w.append(b1weights[n][m] + k) elif r > 0.6: w.append(b2weights[n][m] + k) else: w.append((b1weights[n][m] + b2weights[n][m]) / 2 + k) newWeights.append(w) newBrain.append(newWeights) newBrain.append(self.model.get_weights()[i + 1]) self.model.set_weights(newBrain)
def test_save_load_weights_gcs(): model = Sequential() model.add(Dense(2, input_shape=(3,))) org_weights = model.get_weights() with tf_file_io_proxy('keras.engine.saving.tf_file_io') as file_io_proxy: gcs_filepath = file_io_proxy.get_filepath( filename='test_save_load_weights_gcs.h5') # we should not use same filename in several tests to allow for parallel # execution model.save_weights(gcs_filepath) model.set_weights([np.random.random(w.shape) for w in org_weights]) for w, org_w in zip(model.get_weights(), org_weights): assert not (w == org_w).all() model.load_weights(gcs_filepath) for w, org_w in zip(model.get_weights(), org_weights): assert_allclose(w, org_w) file_io_proxy.delete_file(gcs_filepath) # cleanup
def test_EarlyStopping_reuse(): patience = 3 data = np.random.random((100, 1)) labels = np.where(data > 0.5, 1, 0) model = Sequential(( Dense(1, input_dim=1, activation='relu'), Dense(1, activation='sigmoid'), )) model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy']) stopper = callbacks.EarlyStopping(monitor='acc', patience=patience) weights = model.get_weights() hist = model.fit(data, labels, callbacks=[stopper]) assert len(hist.epoch) >= patience # This should allow training to go for at least `patience` epochs model.set_weights(weights) hist = model.fit(data, labels, callbacks=[stopper]) assert len(hist.epoch) >= patience
class RNNBuilder(NNBuilder): """ Recurrent neural network builder """ def build_model(self, layers, cell=LSTM, weights=None, dense_activation='tanh', verbose=0, **kwargs): # self.hidden_layers=len(layers) - 2 # self.layers=layers # self.input_dim=layers[0] # self.output_dim=layers[-1] self.model = Sequential() for i in range(len(layers) - 2): self.model.add(cell( # Keras API 2 input_shape=(None, layers[i]), units=layers[i+1], # Keras API 1 # input_dim=layers[i], # output_dim=layers[i+1], kernel_initializer='zeros', recurrent_initializer='zeros', bias_initializer='zeros', # Uncomment to use last batch state to init next training step. # Specify shuffle=False when calling fit() # batch_size=batch_size, stateful=True, return_sequences=True if i < (len(layers) - 3) else False)) self.model.add(Dense(layers[-1], activation=dense_activation, kernel_initializer='zeros', bias_initializer='zeros')) if weights: self.model.set_weights(weights) self.trainable_params = int(np.sum( [K.count_params(p) for p in set(self.model.trainable_weights)])) if verbose > 1: self.model.summary() return self.model
def train_model(feature_layers, classification_layers, image_list, nb_epoch, nb_classes, img_rows, img_cols, weights=None): # Create testset data for cross-val num_images = len(image_list) test_size = int(0.2 * num_images) print("Train size: ", num_images-test_size) print("Test size: ", test_size) model = Sequential() for l in feature_layers + classification_layers: model.add(l) if not(weights is None): model.set_weights(weights) # let's train the model using SGD + momentum (how original). sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd) print('Using real time data augmentation') for e in range(nb_epoch): print('-'*40) print('Epoch', e) print('-'*40) print('Training...') # batch train with realtime data augmentation progbar = generic_utils.Progbar(num_images-test_size) for X_batch, Y_batch in flow(image_list[0:-test_size]): X_batch = X_batch.reshape(X_batch.shape[0], 3, img_rows, img_cols) Y_batch = np_utils.to_categorical(Y_batch, nb_classes) loss = model.train_on_batch(X_batch, Y_batch) progbar.add(X_batch.shape[0], values=[('train loss', loss)]) print('Testing...') # test time! progbar = generic_utils.Progbar(test_size) for X_batch, Y_batch in flow(image_list[-test_size:]): X_batch = X_batch.reshape(X_batch.shape[0], 3, img_rows, img_cols) Y_batch = np_utils.to_categorical(Y_batch, nb_classes) score = model.test_on_batch(X_batch, Y_batch) progbar.add(X_batch.shape[0], values=[('test loss', score)]) return model, model.get_weights()
class Brain: def __init__(self, model): if (model == None): self.model = Sequential() self.model.add(Dense(12, input_dim=6, activation="tanh", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=None))) # self.model.add(Dense(20, activation="tanh", # kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=None))) # self.model.add(Dense(20, activation="tanh", # kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=None))) # self.model.add(Dense(20, activation="tanh", # kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=None))) self.model.add(Dense(3, activation="tanh", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=None))) self.model.compile(optimizer='sgd', loss='mean_squared_error') else: self.model = model def getOutputs(self, inputs): return self.model.predict(np.asarray([inputs])) def breed(self, brain1, brain2): newBrain = [] for i in range(0, len(self.model.get_weights()), 2): newWeights = [] b1weights = brain1.model.get_weights()[i] b2weights = brain2.model.get_weights()[i] for j in range(len(b1weights)): w = [] for k in range(len(b1weights[0])): r = random() if r > 0.8: genome = choice([b1weights[j][k], b2weights[j][k]]) w.append(genome + randint(-200, 200)/1000) else: w.append(choice([b1weights[j][k], b2weights[j][k]])) newWeights.append(w) newBrain.append(newWeights) newBrain.append(self.model.get_weights()[i + 1]) self.model.set_weights(newBrain)
def _test_equivalence(channel_order=None): from kfs.layers.convolutional import Convolution2DEnergy_TemporalBasis from keras.models import Sequential #from keras.layers import Flatten, Dense input_shape = (12, 3, 64, 64) if channel_order is None: channel_order = K.image_data_format() if channel_order == 'channels_last': input_shape = (12, 64, 64, 3) nn = Sequential() nn.add(Convolution2DEnergy_TemporalBasis(8, 16, 4, (5, 5), 7, padding='same', input_shape=input_shape, data_format=channel_order)) rng = np.random.RandomState(42) datums = rng.randn(6, 12, 3, 64, 64).astype('float32') if channel_order == 'channels_last': datums = datums.transpose(0, 1, 3, 4, 2) nn.compile(loss='mse', optimizer='sgd') nn2 = Sequential() nn2.add(Convolution2DEnergy_TemporalCorrelation(8, 16, 4, (5, 5), 7, padding='same', input_shape=input_shape, data_format=channel_order)) nn2.compile(loss='mse', optimizer='sgd') nn2.set_weights(nn.get_weights()) pred1 = nn.predict(datums) pred2 = nn2.predict(datums) assert ((pred1 - pred2) == 0.).all() return nn, nn.predict(datums), nn2, nn2.predict(datums)
class LSTM_model(NN): """ Class for setting up an LSTM. The adjustable parameters are: - input data, which will be splitted automatically into training, validation and testing datasets. - batch size, which affects model trainings speed and accuracy - number of training epochs - number of neurons - number of timesteps between which state is kept. While training, the Model uses evaluation data at the end of each epoch to evaluate itself, which results in much faster training. Furthermore there are actual two models being used. One for training and one for testing. The training model has a different batchsize for faster training. The testing model will always have a batchsize of 1 as you usually want to make predictions from one timeseries at the time. """ def __init__(self, data=None, batch_size=None, nb_epoch=None, neurons=None, time_steps=None): """ Initialization of the LSTM class. Args: data: str: path to netcdf-file batch_size: int: number of how many samples are trained at same time nb_epoch: int: number of epochs neurons: int: number of neurons time_steps: int: number of timesteps used for training AND prediction """ self.data = data self.batch_size = batch_size self.nb_epoch = nb_epoch self.neurons = neurons self.data_dim = 18432 self.time_steps = time_steps self.path = '' def getdata(self, file): """ loads and scales data with ``DataHandler`` and ``scale`` Args: file: str: file name """ f0 = (4 * 1.5) * self.batch_size * self.time_steps + self.time_steps + 1 f1 = f0 + ( 4 * 0.25) * self.batch_size * self.time_steps + self.time_steps + 1 print(f0) _data = DataHandler().get_var(file, var_name="var167") _data = DataHandler().shape(_data) _data_data = scale().T(_data[:int(f0)]) _valid_data = scale().T(_data[int(f0):int(f1)]) _test_data = scale().T(_data[int(f1):]) if self.data == None: self.data = _data_data self.valid_data = _valid_data self.test_data = _test_data else: self.data.value = np.concatenate( (self.data.value, _data_data.value), axis=0) def init_model(self, batch_size=None, nb_epoch=None, neurons=None): """ Initializes the LSTM model. If not set already, batch_size, epochs and neurons can be set or reset. Args: batch_size: int: number of how many samples are trained at same time nb_epoch: int: number of epochs neurons: int: number of neurons Returns: """ # get arguments: if batch_size: self.batch_size = batch_size if nb_epoch: self.nb_epoch = nb_epoch if neurons: self.neurons = neurons # make sure everything is set before continuing assert self.neurons assert self.nb_epoch assert self.batch_size assert self.time_steps assert self.data_dim # setup model: self.model = Sequential() self.model.add( LSTM( units=self.neurons, batch_size=self.batch_size, stateful= False, # within one batch state is still kept. This just means between batches input_shape=(self.time_steps, self.data_dim))), self.model.add( Dense(self.data_dim) ) # <- does not really do much. Is just for output in right shape. self.model.compile(loss='mean_squared_error', optimizer='adam') #self.model.compile(loss='mean_squared_error', optimizer=optimizers.Adadelta()) return self def createGenerators(self): """ Creates generators for the training, evaluation and testing data as they make life here a whole lot easier. The input is split here into training (2/3 of input data), evaluation data (1/6) and testing data (1/6.) Returns: sets the class variables: - train_gen: Generator for training data - valid_gen: Generator for validation data - test_gen: Generator for testing data """ #f0 = 64*12+12+1 f0 = (4 * 1.5) * self.batch_size * self.time_steps + self.time_steps + 1 f1 = f0 + ( 4 * 0.25) * self.batch_size * self.time_steps + self.time_steps + 1 print(f0) print(f1) print(len(self.data.value)) print(self.data.value[int(f1):int(f1) + 12].shape) self.train_gen = TimeseriesGenerator( self.data.value[:int(f0)], self.data.value[:int(f0)], sampling_rate=1, shuffle= False, #shuffle=False is very important as we are dealing with continous timeseries length=self.time_steps, batch_size=self.batch_size) self.valid_gen = TimeseriesGenerator(self.data.value[int(f0):int(f1)], self.data.value[int(f0):int(f1)], sampling_rate=1, shuffle=False, length=self.time_steps, batch_size=self.batch_size) self.test_gen = TimeseriesGenerator(self.data.value[int(f1):], self.data.value[int(f1):], sampling_rate=1, shuffle=False, length=self.time_steps, batch_size=1) def create_ensemble_generator(self): self.train_gen = TimeseriesGenerator( self.data.value, self.data.value, sampling_rate=1, shuffle= True, #shuffle=False is very important as we are dealing with continous timeseries length=self.time_steps, batch_size=self.batch_size) self.valid_gen = TimeseriesGenerator(self.valid_data.value, self.valid_data.value, sampling_rate=1, shuffle=True, length=self.time_steps, batch_size=self.batch_size) self.test_gen = TimeseriesGenerator(self.test_data.value, self.test_data.value, sampling_rate=1, shuffle=False, length=self.time_steps, batch_size=1) def fit_model(self): """ Fitting the model to the input-data and parameters. It will therefore use the generators. At the end the prediction-model will be initialized automatically and will replace the training model. """ # tensorboard not possible to use with validation generator in current keras version #tb_callback = TensorBoard(...) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5) csv_logger = CSVLogger(self.path + 'training.log') # history callback returns loss and validation loss for each epoch #history = History() callbacks = [] #callbacks.append(tb_callback) callbacks.append(reduce_lr) #callbacks.append(history) callbacks.append(csv_logger) self.model.fit_generator(self.train_gen, shuffle=False, epochs=self.nb_epoch, validation_data=self.valid_gen, verbose=1, callbacks=callbacks) #self.history = history.history def init_pred_model(self): """ This function will init a new model for the prediction with the already trained weights. The new model is exactly the same as the old one, with only the batch-size differing. """ self.training_model = self.model weights = self.training_model.get_weights() self.model = Sequential() self.model.add( LSTM( units=self.neurons, batch_size=1, # return_sequences=True, stateful=False, input_shape=(self.time_steps, self.data_dim))), self.model.add(Dense(self.data_dim)) self.model.set_weights(weights) self.model.compile(loss='mean_squared_error', optimizer='adam') #self.model.compile(loss='mean_squared_error', optimizer=optimizers.Adadelta()) def evaluate(self): """ Making predictions with the testing model using the testing-data-generator. Returns: tuple(py,preds) py: numpy array of target values (Tuth values) preds: numpy array of LSTM prediction for the targets """ print("Evaluating the model...") py = np.zeros([len(self.test_gen), self.data_dim]) for i in range(len(self.test_gen)): py[i] = (self.test_gen[i][1][0][:]) preds = self.model.predict_generator(self.test_gen) # print("Truth: %.5f | Prediction: %.5f "%(test_y*scaler,p[0]*scaler)) # pred_model.evaluate_generator(test_gen) return py, preds def predict(self, value): self.model.predict(value) def scale(self, var="T"): pass def scale_invert(self, value): ret = self.data.scaler.inverse_transform(value) return ret
import os import numpy as np from keras.layers import Dense from keras.models import Sequential from studio import fs_tracker model = Sequential() model.add(Dense(2, input_shape=(2, ))) weights = model.get_weights() new_weights = [np.array([[2, 0], [0, 2]])] # print weights # new_weights = [] # for weight in weights: # new_weights.append(weight + 1) model.set_weights(new_weights) model.save(os.path.join(fs_tracker.get_model_directory(), 'weights.h5'))
#training model=autoencoderKeras.overall_train(model,tr_set_st,0.5) #test of ae on training set tr = model.predict(tr_set_st) tr_pred=denormalize(tr,tr_data_stands) #test of ae on test set test = model.predict(test_set_st) test_pred=denormalize(test,test_data_stands) #coding model code_model=Sequential() code_model.add(Dense(4096,activation='sigmoid',input_dim=len(tr_set_st[0,:]))) code_model.add(Dense(512,activation='sigmoid')) code_model.add(Dense(64,activation='sigmoid')) #copy trained weighs weights=model.get_weights() code_model.set_weights(weights[0:6]) #codes test_code=code_model.predict(test_set_st) tr_code=code_model.predict(tr_set_st) #linear svm on raw training clf = SVC(class_weight='balanced') clf.fit(tr_code, tr_labels) predicted_labels=clf.predict(tr_code) conf_mat=confusion_matrix(tr_labels,predicted_labels) print(conf_mat) predicted_labels=clf.predict(test_code) conf_mat=confusion_matrix(test_labels,predicted_labels) print(conf_mat)
model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(128, activation='relu', kernel_regularizer=l2(1e-5))) model.add(Dense(num_classes, kernel_regularizer=l2(1e-5))) model.add(Activation('softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.9), metrics=['accuracy']) # Keep the initial weights to compare W = model.get_weights() # Train with SVRG s_svrg = time.time() model.set_weights(W) SVRG(model, B=0, B_over_b=len(x_train) // batch_size).fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) e_svrg = time.time() score_svrg = model.evaluate(x_test, y_test, verbose=0) # Train with uniform s_uniform = time.time() model.set_weights(W) model.fit(x_train,
class DQN: def __init__(self, args, num_actions, num_observations): self.args = args self.num_actions = num_actions self.num_observations = num_observations #Create the model that will be trained self.model = Sequential() self.model.add( Dense(output_dim=self.args.layer_size_1, activation=self.args.layer_activation_1, input_dim=self.num_observations)) self.model.add( Dense(output_dim=self.args.layer_size_2, activation=self.args.layer_activation_2)) self.model.add(Dense(output_dim=self.num_actions, activation='linear')) self.model.compile(loss='mean_squared_error', optimizer=self.args.optimizer) #Create the model that will calculate target values self.model_target = Sequential() self.model_target = Sequential.from_config(self.model.get_config()) self.model_target.set_weights(self.model.get_weights()) self.model_target.compile(loss='mean_squared_error', optimizer=self.args.optimizer) #Create the Replay Memory self.replay_memory = deque(maxlen=self.args.memory_size) self.current_epsilon = self.args.maximum_epsilon self.current_learning_rate = self.args.learning_rate self.train_iterations = 0 self.first_iterations = 0 self.current_episode = 0 self.current_step = 0 self.average_train_rewards = deque(maxlen=100) self.average_test_rewards = deque(maxlen=100) self.train_path = args.save_path + ".train" self.train_file = open(self.train_path, 'w') self.train_file.write('episode reward average_reward\n') self.test_path = args.save_path + ".test" self.test_file = open(self.test_path, 'w') self.test_file.write('episode reward\n') def __del__(self): self.train_file.close() self.test_file.close() pass def get_random_action(self): return np.random.randint(0, self.num_actions) def get_action(self, observation, training): #get the current action for the model or a random action depending on arguments #and the current episode if training and np.random.random() < self.current_epsilon: #if training, choose random actions return np.random.randint(0, self.num_actions) else: #choose action from model q_values = self.model.predict( np.array(observation).reshape(1, self.num_observations)) max_q = q_values.max(axis=1) action_choices = [] for i, q in enumerate(q_values[0]): if q == max_q: action_choices.append(i) return np.random.choice(action_choices) def add_transaction(self, state, action, next_state, reward, terminal): #add a transaction to replay memory, should be called after performing #an action and getting an observation self.replay_memory.append( (state, action, next_state, reward, terminal)) self.current_step += 1 #make end of episode checks if terminal: self.end_of_episode() def end_of_episode(self): self.current_episode += 1 self.current_step = 0 if self.current_epsilon > self.args.minimum_epsilon: self.decay_epsilon() else: self.current_epsilon = self.args.minimum_epsilon if self.current_episode % self.args.learning_rate_decay_ep == 0: self.decay_learning_rate() def sample_memory(self, batch_size): #samples the replay memory returning a batch_size of random transactions assert (len(self.replay_memory) >= batch_size) return [ self.replay_memory[i] for i in np.random.choice(len(self.replay_memory), batch_size) ] def train_model(self): if len(self.replay_memory) < self.args.memory_samples: print 'Not enough transactions in replay memory to train.' return if self.args.target_copy_iterations > 0 and self.train_iterations >= self.args.target_copy_iterations: self.update_target_network() if self.args.target_copy_iterations > 0 and self.first_iterations < self.args.target_copy_start_steps: # update the target network a few times on episode 0 so # the model isn't training toward a completely random network self.update_target_network() self.first_iterations += 1 samples = self.sample_memory(self.args.memory_samples) observations = next_observations = rewards = np.array([]) actions = terminals = np.array([], dtype=int) for transaction in samples: observations = np.append(observations, transaction[0]) actions = np.append(actions, transaction[1]) next_observations = np.append(next_observations, transaction[2]) rewards = np.append(rewards, transaction[3]) terminals = np.append(terminals, transaction[4]) observations = observations.reshape(self.args.memory_samples, self.num_observations) next_observations = next_observations.reshape(self.args.memory_samples, self.num_observations) targets = updates = None if self.args.target_copy_iterations == 0: #this instance is not using a target copy network, use original model targets = self.model.predict(observations) updates = rewards + ( 1. - terminals) * self.args.future_discount * self.model.predict( next_observations).max(axis=1) else: #this instance uses a target copy network targets = self.model_target.predict(observations) updates = rewards + ( 1. - terminals ) * self.args.future_discount * self.model_target.predict( next_observations).max(axis=1) for i, action in enumerate(actions): targets[i][action] = updates[i] self.model.fit(observations, targets, nb_epoch=1, batch_size=self.args.memory_samples, verbose=0) self.train_iterations += 1 def update_target_network(self): self.model_target.set_weights(self.model.get_weights()) def decay_epsilon(self): self.current_epsilon *= self.args.epsilon_decay def decay_learning_rate(self): self.current_learning_rate *= self.args.learning_rate_decay def write_training_episode(self, episode, reward): self.average_train_rewards.append(reward) self.train_file.write(str(episode) + ' ') self.train_file.write(str(reward) + ' ') if len(self.average_train_rewards) >= 100: self.train_file.write(str(np.mean(self.average_train_rewards))) self.train_file.write('\n') def write_testing_episode(self, episode, reward): self.average_test_rewards.append(reward) self.test_file.write(str(episode) + ' ') self.test_file.write(str(reward) + ' ') self.test_file.write('\n') def save_model(self, file_name): file_path = self.args.save_path + '_' + file_name + '.model' self.model.save(file_path, True)
class DnCnn_Class_Train: def __init__(self): print('Constructor Called') self.IMAGE_WIDTH = 60 self.IMAGE_HEIGHT = 60 self.CHANNELS = 3 self.N_SAMPLES = 1105920 self.N_TRAIN_SAMPLES = 1024000 self.N_EVALUATE_SAMPLES = 81920 self.N_LAYERS = 20 self.Filters = 64 self.X_TRAIN = np.zeros((self.N_TRAIN_SAMPLES, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.CHANNELS)) self.Y_TRAIN = np.zeros((self.N_TRAIN_SAMPLES, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.CHANNELS)) self.X_EVALUATE = np.zeros((self.N_EVALUATE_SAMPLES, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.CHANNELS)) self.Y_EVALUATE = np.zeros((self.N_EVALUATE_SAMPLES, self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.CHANNELS)) print('train data loading : start') path = './Data/' xpath_matfile = path + 'inputData' + '.mat' xname_matfile = 'inputData' x = sio.loadmat(xpath_matfile) self.X_TRAIN[:, :, :, :] = x[xname_matfile] ypath_matfile = path + 'labels' + '.mat' yname_matfile = 'labels' y = sio.loadmat(ypath_matfile) self.Y_TRAIN[:, :, :, :] = y[yname_matfile] print('train data loading : end') print('validation data loading : start') x = sio.loadmat(path + 'inputDataVal.mat') self.X_EVALUATE[:, :, :, :] = x['inputDataVal'] y = sio.loadmat(path + 'labelsVal.mat') self.Y_EVALUATE[:, :, :, :] = y['labelsVal'] print('validation data loading : end') def ModelMaker(self, optim): self.myModel = Sequential() input = Input(shape=(self.IMAGE_WIDTH, self.IMAGE_HEIGHT, self.CHANNELS)) firstLayer = Convolution2D( filters=self.Filters, kernel_size=(3, 3), strides=(1, 1), kernel_initializer=RandomNormal(mean=0.0, stddev=0.001, seed=None), padding='same', input_shape=(self.IMAGE_WIDTH, self.IMAGE_HEIGHT, self.CHANNELS), use_bias=True, bias_initializer='zeros') self.myModel.add(firstLayer) self.myModel.add(Activation('relu')) for i in range(self.N_LAYERS - 2): Clayer = Convolution2D( filters=self.Filters, kernel_size=(3, 3), strides=(1, 1), kernel_initializer=RandomNormal(mean=0.0, stddev=0.001, seed=None), padding='same', input_shape=(self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.Filters), use_bias=True, bias_initializer='zeros') self.myModel.add(Clayer) Blayer = BatchNormalization(axis=-1, epsilon=1e-3) self.myModel.add(Blayer) self.myModel.add(Activation('relu')) lastLayer = Convolution2D(filters=self.CHANNELS, kernel_size=(3, 3), strides=(1, 1), kernel_initializer=RandomNormal(mean=0.0, stddev=0.001, seed=None), padding='same', input_shape=(self.IMAGE_HEIGHT, self.IMAGE_WIDTH, self.Filters), use_bias=True, bias_initializer='zeros') self.myModel.add(lastLayer) self.myModel.compile(loss='mean_squared_error', metrics=[scaled_mse], optimizer=optim) print("Model Created") self.myModel.summary() def loadPrevModel(self, modelFileToLoad): self.savedModel = keras.models.load_model( modelFileToLoad, custom_objects={'scaled_mse': scaled_mse}) self.savedModel.summary() self.myModel.set_weights(self.savedModel.get_weights()) self.myModel.summary() def trainModelAndSaveBest(self, BATCH_SIZE, EPOCHS, modelFileToSave, logFileToSave): csv_logger = CSVLogger(logFileToSave) myCallback = callbacks.ModelCheckpoint(modelFileToSave, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) trainHistory = self.myModel.fit(x=self.X_TRAIN, y=self.Y_TRAIN, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1, callbacks=[csv_logger, myCallback], validation_data=(self.X_EVALUATE, self.Y_EVALUATE)) return trainHistory def reCompileModel(self, optim): self.myModel.compile(loss='mean_squared_error', metrics=[scaled_mse], optimizer=optim)
model.add(Dense(units=1, input_shape=(1,))) model.summary() model.compile(Adam(lr=0.2), 'mean_squared_error') model.fit(X, y_true, epochs=50) y_pred=model.predict(X) plt.scatter(x=X, y=y_true, data=df) plt.plot(X, y_pred, color='red') weights, biases=model.get_weights() from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.2, random_state=0) weights[0,0]=0.0 biases[0]=0.0 model.set_weights((weights, biases)) model.fit(X_train, y_train, epochs=50, verbose=0) y_train_pred=model.predict(X_train) y_test_pred=model.predict(y_test) from sklearn.metrics import mean_squared_error as mse mse_train=mse(y_train, y_train_pred) mse_test=mse(y_test, y_test_pred)
base_path = "../../../../../../../../resources/weights/" backend = K.backend() version = keras.__version__ major_version = int(version[0]) if major_version == 2: from keras.layers import Conv2D else: from keras.layers import Convolution2D as Conv2D input_shape=(5, 5, 5) n_out = 6 kernel_size = (3, 3) weights = np.arange(0, kernel_size[0] * kernel_size[1] * input_shape[0] * n_out) weights = weights.reshape((kernel_size[0], kernel_size[1], input_shape[0], n_out)) bias = np.arange(0, n_out) model = Sequential() if major_version == 2: model.add(Conv2D(n_out, kernel_size, input_shape=input_shape)) else: model.add(Conv2D(n_out, kernel_size[0], kernel_size[1], input_shape=input_shape)) model.set_weights([weights, bias]) model.compile(loss='mse', optimizer='adam') print("Saving model with single 2D convolution layer for backend {} and keras major version {}".format(backend, major_version)) model.save("{}conv2d_{}_{}.h5".format(base_path, backend, major_version))
model = autoencoderKeras.overall_train(model, tr_set_st, 0.5) #test of ae on training set tr = model.predict(tr_set_st) tr_pred = denormalize(tr, tr_data_stands) #test of ae on test set test = model.predict(test_set_st) test_pred = denormalize(test, test_data_stands) #coding model code_model = Sequential() code_model.add( Dense(4096, activation='sigmoid', input_dim=len(tr_set_st[0, :]))) code_model.add(Dense(512, activation='sigmoid')) code_model.add(Dense(64, activation='sigmoid')) #copy trained weighs weights = model.get_weights() code_model.set_weights(weights[0:6]) #codes test_code = code_model.predict(test_set_st) tr_code = code_model.predict(tr_set_st) #linear svm on raw training clf = SVC(class_weight='balanced') clf.fit(tr_code, tr_labels) predicted_labels = clf.predict(tr_code) conf_mat = confusion_matrix(tr_labels, predicted_labels) print(conf_mat) predicted_labels = clf.predict(test_code) conf_mat = confusion_matrix(test_labels, predicted_labels) print(conf_mat) #rule based diagnosis on raw training
def fit(self, x, y, batch_size=32, epochs=10, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, activation='sigmoid', loss='mean_squared_error', classloss='categorical_crossentropy', metrics=['mse', 'acc'], pre_epoch=10, droupout=0, lr=0.1, decay=1e-6, momentum=0.2, nesterov=True, lastLoss='softmax', **kwargs): """Trains the model for a fixed number of epochs. # Arguments x: input data, as a Numpy array or list of Numpy arrays (if the model has multiple inputs). y: labels, as a Numpy array. batch_size: integer. Number of samples per gradient update. epochs: integer, the number of epochs to train the model. verbose: 0 for no logging to stdout, 1 for progress bar logging, 2 for one log line per epoch. callbacks: list of `keras.callbacks.Callback` instances. List of callbacks to apply during training. See [callbacks](/callbacks). validation_split: float (0. < x < 1). Fraction of the data to use as held-out validation data. validation_data: tuple (x_val, y_val) or tuple (x_val, y_val, val_sample_weights) to be used as held-out validation data. Will override validation_split. shuffle: boolean or str (for 'batch'). Whether to shuffle the samples at each epoch. 'batch' is a special option for dealing with the limitations of HDF5 data; it shuffles in batch-sized chunks. class_weight: dictionary mapping classes to a weight value, used for scaling the loss function (during training only). sample_weight: Numpy array of weights for the training samples, used for scaling the loss function (during training only). You can either pass a flat (1D) Numpy array with the same length as the input samples (1:1 mapping between weights and samples), or in the case of temporal data, you can pass a 2D array with shape (samples, sequence_length), to apply a different weight to every timestep of every sample. In this case you should make sure to specify sample_weight_mode="temporal" in compile(). initial_epoch: epoch at which to start training (useful for resuming a previous training run) # Returns A `History` object. Its `History.history` attribute is a record of training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). # Raises RuntimeError: if the model was never compiled. """ # Legacy support if 'nb_epoch' in kwargs: warnings.warn('The `nb_epoch` argument in `fit` ' 'has been renamed `epochs`.') epochs = kwargs.pop('nb_epoch') if kwargs: raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) if self.model is None: raise RuntimeError('The model needs to be compiled ' 'before being used.') hid = self.hid sgd = RMSprop(lr=lr) if not self.pretrain: x_input = x decoder_layers = [] autoencoder = Sequential() for i in range(1, len(self.layers)): temp = Sequential() temp.add( Dense(hid[i], activation=activation, input_shape=(hid[i - 1], ))) # temp.add(normalization.BatchNormalization()) temp.add(Dropout(droupout)) temp.add( Dense(hid[i - 1], activation=activation, input_shape=(hid[i], ))) temp.compile(loss=loss, optimizer=sgd, metrics=metrics) temp.fit(x_input, x_input, batch_size=batch_size, epochs=epochs, shuffle=True, verbose=0) decoder_layers.append(temp.layers[-1]) autoencoder.add(temp.layers[0]) #func = K.function([autoencoder.model.input], [autoencoder.model.layers[-1].get_output_at(0)]) temp2 = Sequential() # print(x_input.shape) temp2.add( Dense(hid[i], activation=activation, input_shape=(hid[i - 1], ))) temp2.set_weights(temp.layers[0].get_weights()) x_input = temp2.predict(x_input) del temp del temp2 decoder_layers.reverse() print('after layer by layer pretrain') for i in range(0, len(self.layers) - 1): autoencoder.add(decoder_layers[i]) # autoencoder.layers[i].set_weights(decoder_layers[i].get_weights()) autoencoder.compile(loss=loss, optimizer=sgd, metrics=metrics) autoencoder.fit(x, x, batch_size=batch_size, epochs=pre_epoch, shuffle=True, verbose=0) temp = Sequential() for i in range(0, len(self.layers) - 1): temp.add(autoencoder.layers[i]) temp.layers[i].set_weights(autoencoder.layers[i].get_weights()) if len(y): temp.add( Dense(y.shape[1], activation=lastLoss, input_shape=(hid[-1], ))) temp.compile(loss=classloss, optimizer=sgd, metrics=metrics) temp.fit(x, y, batch_size=batch_size, epochs=pre_epoch, shuffle=True, verbose=0) score = temp.evaluate(x, y, batch_size=20, verbose=1) print(score) print('After supervised Training') for i in range(0, len(self.layers)): self.layers[i].set_weights(temp.layers[i].get_weights()) self.layers = temp.layers self.pretrain = True del temp del autoencoder del decoder_layers
class KerasRegressionModel(RegressionModel): def __init__(self, arity=1, network_structure=(1,), activation_function="tanh", error_metric="rmse", optimizer_type="nadam", learning_rate=None, loss_function="mse", nb_epoch=1, batch_size=100, early_stopping=False, weight_init_method="normal", graphical_verbose=False, validation_split=0.1, dropout=False, dropout_input_layer_fraction=0.2, dropout_hidden_layer_fraction=0.5, batch_normalization=False, verbose=False, weight_decay=False, weigt_decay_parameter=0.001, **kwargs): """ A class to construct arbitrary artifical neural networks using Keras library (http://keras.io/). The module supports state-of-the-art technologies for optimization and regularization of ANNs. :param network_structure: A tuple which specifies the number of neurons for each layer :param activation_function: Activation function used, cf. http://keras.io/activations/ :param error_metric: Error metric :param optimizer_type: Specifies the optimization method used :param loss_function: Loss function (given by Keras or custom loss functions), cf. http://keras.io/objectives/ :param nb_epoch: Number of training epochs :param batch_size: Batch size used for mini-batch learning :param early_stopping: If set True, training will be interruptped when the loss isn't decaying anymore :param init: Method of weight initialization, e.g normal, glorot_normal, uniform :param arity: Input dimension :param verbose: Verbose mode, verbose=1 show progress bar logging, verbose=2 show console logging :param graphical_verbose: If True, :param dropout: Use dropout layers for regularization :param dropout_input_layer_fraction: Fraction of input units to drop :param dropout_hidden_layer_fraction: Fraction hidden layer units to drop :param batch_normalization: Activate batch normalization :param weight_decay: Activate weight decay regularization method :param weight_decay_parameter: Sets the weight decay regularization parameter :param kwargs: """ super(RegressionModel, self).__init__() #self.logger.info("Compiling ANN...") self.__dict__.update(locals()) # Initialize ANN structure self.__model = Sequential() self.input_layer_params = {"input_shape": (self.arity,), "activation": self.activation_function, "output_dim": self.network_structure[0], "init": self.weight_init_method} self.hidden_layer_params = {"activation": self.activation_function, "init": self.weight_init_method} if self.weight_decay: self.hidden_layer_params["W_regularizer"] = l2(weigt_decay_parameter) self.output_layer_params = {"activation": "linear", "init": self.weight_init_method} self.create_input_layer() # stack up remaining layers. self.create_hidden_layers() self.create_output_layer() # compile the neural network self.__model.compile(optimizer=RMSprop(lr=0.001), loss=self.loss_function) #self.logger.info("Compilation completed...") self.func = self.__model.predict def add_layer(self, num_nodes, layer_params, dropout=False): self.__model.add(Dense(num_nodes, **layer_params)) if (dropout): self.__model.add(Dropout(self.dropout_hidden_layer_fraction)) def create_input_layer(self): if self.dropout: self.__model.add(Dropout(self.dropout_input_layer_fraction, input_shape=(self.arity,))) del self.input_layer_params["input_shape"] self.__model.add(Dense(**self.input_layer_params)) if self.batch_normalization: self.__model.add(BatchNormalization()) def create_hidden_layers(self): for num_nodes in self.network_structure[1:-1]: self.add_layer(num_nodes, self.hidden_layer_params, dropout=self.dropout) if self.batch_normalization: self.__model.add(BatchNormalization()) def create_output_layer(self): self.add_layer(self.network_structure[-1], self.output_layer_params) if self.batch_normalization: self.__model.add(BatchNormalization()) @property def weights(self): return self.__model.get_weights() #@doc_inherit def fit(self, xfit, yfit): self.hist = self.__model.fit(xfit, yfit, nb_epoch=self.nb_epoch, batch_size=self.batch_size, verbose=self.verbose, validation_split=self.validation_split)#, callbacks=self.callbacks) return self def __getstate__(self): """ Function to make ANNRegressionModel pickable. The weights, the architecture as also the ANN compilation settings are stored in a dictionary. :return: The dictionary containing ANN architecture in json format, weight and ANN compilation setting """ state = copy(self.__dict__) del state["func"] #del state["logger"] #del state["_ANNRegressionModel__model"] del state["hist"] return dict(json_model=self.__model.to_json(), weights=self.__model.get_weights(), config=state) def __setstate__(self, d): """ Function to make ANNRegressionModel pickable :param d: :return: """ self.__dict__ = d["config"] self.__model = model_from_json(d["json_model"]) self.__model.set_weights(d["weights"]) self.func = self.__model.predict def print_summary(self): """ Print summary of the neural network, includes architecture and compiling setting. :return: """ self.__model.summary()
dataset = pd.read_csv('weight-height.csv') y = dataset['Weight'] from keras.models import Sequential from keras.layers import Dense from keras.optimizers import Adam model = Sequential() model.add(Dense(units=1, input_shape=(1, ))) model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.000001)) model.fit(X, y, epochs=20) W, B = model.get_weights() W B W[0, 0] = 0.0 B[0] = 0.0 model.set_weights((W, B)) model.get_weights()
class ANN(object): def __init__(self, input_size, num_hidden_layers, hidden_layer_sizes, output_size, epochs=50, batch_size=1, fit_verbose=2, variables=None): self.input_size = input_size self.num_hidden_layers = num_hidden_layers self.hidden_layer_sizes = hidden_layer_sizes self.output_size = output_size self.epochs = epochs self.batch_size = batch_size self.verbose = fit_verbose self.build_model() def build_model(self): self.model = Sequential() self.model.add( Dense(self.hidden_layer_sizes[0], input_shape=(self.input_size, ), activation='relu')) for i in range(1, self.num_hidden_layers - 1): self.model.add(Dense(self.hidden_layer_sizes[i], activation='relu')) self.model.add( Dense(self.hidden_layer_sizes[len(self.hidden_layer_sizes) - 1], activation='relu')) self.model.add(Dense(self.output_size, activation='sigmoid')) self.model.compile(loss='mean_squared_error', optimizer='adam') def predict(self, data): """ Runs the data in the data parameter through the network and returns a list of predicted values. data - a matrix of data (explanatory variables) to be sent through the LSTM """ return self.model.predict(data) def get_weights(self): """ Returns the weights for each layer in the network (list of arrays). """ return self.model.get_weights() def set_weights(self, weights): """ Sets the weights of the network. """ self.model.set_weights(weights) def train(self, train_x, train_y, optimzer='adam'): """ Trains the model using the Adam optimization algortihm (more to be implemented later). Creates a 'history' attr of the LSTM. train_x - a matrix of explanatory variables for training train_y - a matrix of dependent variables to train on optimizer - optimization algorithm (Adam is the only one implemented) """ self.history = self.model.fit(train_x, train_y, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose, shuffle=False)
def evaluate_fold(fold_ix, use_pretrained_embedding, bi_directional, num_rnns, merge_mode, hidden_size): if use_pretrained_embedding: embedding_matrix = get_embedding_matrix(unique_words, generator, max_features, init='uniform', unit_length=False) embedding_layer = Embedding(max_features, EMBEDDING_DIM, weights=[embedding_matrix], input_length=maxlen, trainable=True, mask_zero=True) # If false, initialize unfound words with all 0's else: embedding_layer = Embedding(max_features, embedding_size, input_length=maxlen, trainable=True, mask_zero=True) if bi_directional: rnn_layer_fact = lambda: Bidirectional(GRU(hidden_size, return_sequences=True, consume_less="cpu"), merge_mode=merge_mode) else: rnn_layer_fact = lambda: GRU(hidden_size, return_sequences=True, consume_less="cpu") model = Sequential() model.add(embedding_layer) for i in range(num_rnns): model.add(rnn_layer_fact()) model.add(TimeDistributedDense(out_size)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', sample_weight_mode="temporal") X_train, y_train, train_ys_by_tag, seq_len_train = fold2training_data[fold_ix] X_dev, y_dev, dev_ys_by_tag, seq_len_dev = fold2dev_data[fold_ix] X_test, y_test, test_ys_by_tag, seq_len_test = fold2test_data[fold_ix] # init loop vars f1_scores = [-1] num_since_best_score = 0 patience = 3 best_weights = None for i in range(30): print("{ts}: Epoch={epoch}".format(ts=get_ts(), epoch=i)) epochs = 1 # epochs per training instance results = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_split=0.0, verbose=0) micro_metrics, _ = score_predictions(model, X_dev, dev_ys_by_tag, seq_len_dev) print(micro_metrics) f1_score = micro_metrics.f1_score best_f1_score = max(f1_scores) if f1_score <= best_f1_score: num_since_best_score += 1 else: # score improved num_since_best_score = 0 best_weights = model.get_weights() f1_scores.append(f1_score) if num_since_best_score >= patience: break # load best weights model.set_weights(best_weights) train_predictions_by_tag = get_predictions(model, X_train, train_ys_by_tag, seq_len_train) test_predictions_by_tag = get_predictions(model, X_test, test_ys_by_tag, seq_len_test) return train_predictions_by_tag, test_predictions_by_tag, train_ys_by_tag, test_ys_by_tag
def fit_lstm(self, train): # reshape training into [samples, timesteps, features] # timestesp is 1 as there is 1 sample per day X, y = train[:, 0:self.n_lag * self.number_of_indicators], \ train[:, self.n_lag * self.number_of_indicators:] # design network model = Sequential() # source https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/ if self.model_type == "vanilla": X = X.reshape(X.shape[0], self.n_lag, self.number_of_indicators) model.add(LSTM(self.number_of_indicators, batch_input_shape=(self.n_batch, self.n_lag, self.number_of_indicators), stateful=True)) #batch_input_shape=(self.n_batch, X.shape[1], X.shape[2]) model.add(Dense(y.shape[1])) elif self.model_type == "stacked": # 2 hidden layers, but can be modified X = X.reshape(X.shape[0], self.n_lag, self.number_of_indicators) model.add(LSTM(self.number_of_indicators, batch_input_shape=(self.n_batch, self.n_lag, self.number_of_indicators), #batch_input_shape=(self.n_batch, X.shape[1], X.shape[2]) return_sequences=True, stateful=True)) model.add(LSTM(int(self.number_of_indicators * 2/3 + y.shape[1]))) model.add(Dense(y.shape[1])) elif self.model_type == "bi": X = X.reshape(X.shape[0], self.n_lag, self.number_of_indicators) model.add(Bidirectional(LSTM(self.number_of_indicators, stateful=True),batch_input_shape=(self.n_batch, self.n_lag, self.number_of_indicators))) #batch_input_shape=(self.n_batch, X.shape[1], X.shape[2]) model.add(Dense(y.shape[1])) elif self.model_type == "cnn": X = X.reshape(X.shape[0], 1, self.n_lag, self.number_of_indicators) model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1), batch_input_shape=(self.n_batch, None, self.n_lag, self.number_of_indicators))) #batch_input_shape=(None, X.shape[1], X.shape[2]) model.add(TimeDistributed(MaxPooling1D(pool_size=2))) model.add(TimeDistributed(Flatten())) model.add(LSTM(self.number_of_indicators)) model.add(Dense(y.shape[1])) elif self.model_type == "conv": X = X.reshape(X.shape[0], 1, 1, self.n_lag, self.number_of_indicators) model.add(ConvLSTM2D(filters=64, kernel_size=(1, 2), batch_input_shape=(self.n_batch, 1, 1, self.n_lag, self.number_of_indicators))) model.add(Flatten()) model.add(Dense(y.shape[1])) else: raise ValueError("self.model_type is not any of the specified") model.compile(loss='mean_squared_error', optimizer='adam') print("Model Type: ", self.model_type) print("train X size:", len(X), " shape:", X.shape, "train y size:", len(y), " shape: ", y.shape) #print("train X data", X) #print("train y data", y) # fit network print("Training model with batch size", self.n_batch) model.summary() #print("X shape:", X.shape, " y shape:", y.shape) for i in range(self.n_epochs): model.fit(X, y, epochs=1, batch_size=self.n_batch, verbose=0, shuffle=False) model.reset_states() #self.save_plot_model(model, note="fullbatch") # source https://machinelearningmastery.com/use-different-batch-sizes-training-predicting-python-keras/ # Create a new model with batch size 1 and give the trained weight, this allows the model # to be used to predict 1 step instead of batches n_batch = 1 new_model = Sequential() # source https://machinelearningmastery.com/how-to-develop-lstm-models-for-time-series-forecasting/ if self.model_type == "vanilla": new_model.add(LSTM(self.number_of_indicators, batch_input_shape=(n_batch, self.n_lag, self.number_of_indicators), stateful=True)) #batch_input_shape=(self.n_batch, X.shape[1], X.shape[2]) new_model.add(Dense(y.shape[1])) elif self.model_type == "stacked": # 2 hidden layers, but can be modified new_model.add(LSTM(self.number_of_indicators, batch_input_shape=(n_batch, self.n_lag, self.number_of_indicators), #batch_input_shape=(self.n_batch, X.shape[1], X.shape[2]) return_sequences=True, stateful=True)) new_model.add(LSTM(int(self.number_of_indicators * 2/3 + y.shape[1]))) new_model.add(Dense(y.shape[1])) elif self.model_type == "bi": new_model.add(Bidirectional(LSTM(self.number_of_indicators, stateful=True),batch_input_shape=(n_batch, self.n_lag, self.number_of_indicators))) #batch_input_shape=(self.n_batch, X.shape[1], X.shape[2]) new_model.add(Dense(y.shape[1])) elif self.model_type == "cnn": new_model.add(TimeDistributed(Conv1D(filters=64, kernel_size=1), batch_input_shape=(n_batch, None, self.n_lag, self.number_of_indicators))) #batch_input_shape=(None, X.shape[1], X.shape[2]) new_model.add(TimeDistributed(MaxPooling1D(pool_size=2))) new_model.add(TimeDistributed(Flatten())) new_model.add(LSTM(self.number_of_indicators)) new_model.add(Dense(y.shape[1])) elif self.model_type == "conv": new_model.add(ConvLSTM2D(filters=64, kernel_size=(1, 2), batch_input_shape=(n_batch, 1, 1, self.n_lag, self.number_of_indicators))) new_model.add(Flatten()) new_model.add(Dense(y.shape[1])) else: raise ValueError("self.model_type is not any of the specified") new_model.set_weights(model.get_weights()) print("\n\nNew model with batch size 1 for prediction") new_model.summary() return new_model
nb_actions = env.action_space.n model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Convolution2D(32, 8, 8, subsample=(4, 4), input_shape=(84, 84, 3))) model.add(Activation('relu')) model.add(Convolution2D(64, 4, 4, subsample=(2, 2))) model.add(Activation('relu')) model.add(Convolution2D(64, 3, 3)) model.add(Activation('relu')) model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.compile(loss='mse', optimizer=Adam(lr=0.00001)) model.set_weights(model.get_weights()) policy = EpsGreedyQPolicy() memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.fit(env, nb_steps=5000, visualize=False, callbacks=[tensorboard],
if action == 0: action = np.array([1, 0, 0, 0]) elif action == 1: action = np.array([0, 1, 0, 0]) elif action == 2: action = np.array([0, 0, 1, 0]) else: action = np.array([0, 0, 0, 1]) memory.append([state, action, reward, next_state, done]) if len(memory) > batch_size: train() state = next_state model_two.set_weights(model_one.get_weights()) recent_scores.append(score) all_scores.append([score, epsilon, len(memory)]) if np.mean(recent_scores) >= 200: model_one.save('model_success.h5') model_one.save_weights('weights_success.h5') break if e % 100 == 0: print('episode:', e, ' current epsilon:', epsilon, ' current rolling score', np.mean(recent_scores)) all_scores_df = pd.DataFrame(all_scores) all_scores_df.to_csv('scores_success.csv')
average_over=average_over, step_reset=1) noise = 0.30 _epsilons_false = np.random.normal(loc=1., scale=noise, size=n) * _epsilons _etas_false = np.random.normal(loc=1., scale=noise, size=n) * _etas _deltas_false = np.random.normal( loc=1., scale=noise, size=qubitring_ndrives(n)) * _deltas guess_w_b = lambda: qubitring_perfect_pauli_weights( n, _epsilons_false, _etas_false, _deltas_false, noise=guess_distance) starts = [] best = float('inf') for i in range(guess_size): print('\rbest:', best, end='', flush=True) w, b = guess_w_b() model.set_weights([w, b]) start = model.fit_generator(gen_guess, verbose=0, steps_per_epoch=1, epochs=1, callbacks=[]).history starts.append(start) if start['mean_squared_error'][-1] < best: best = start['mean_squared_error'][-1] best_start = start best_ws = [w, b] print() model.set_weights(best_ws) hists.append(best_start) # MODEL 0 main
class DeepQLearner: MINIMUM_EXPERIENCE = 1000 def __init__(self, input_dim, layers, batch_size, total_memory, terminal_fn, eps=0.5, eps_dt=-10 * 10**-5, discount=0.9, target_freeze_duration=2500, rgb_array=False): self.total_memory = total_memory self.batch_size = batch_size self.terminal_fn = terminal_fn if not rgb_array: # experience has s0, s1, action, reward, done? and t self.experience = np.zeros((input_dim * 2 + 4, total_memory)) self.exp_ct = 0 self.exp_index = 0 self.model = Sequential() self.target_model = Sequential() self.input_dim = input_dim self.actions = layers[-1] self.eps = eps self.eps_dt = eps_dt self.discount = discount self.target_freeze_duration = target_freeze_duration self.build_model(input_dim, layers, rgb_array) rms = keras.optimizers.RMSprop(lr=0.003, rho=0.9, epsilon=1e-08, decay=0.0) self.model.compile(optimizer=rms, loss='mse') self.target_model.compile(optimizer=rms, loss='mse') def build_model(self, input_dim, layers, rgb_array): first, layers, end = layers[0], layers[1:-1], layers[-1] if not rgb_array: first_layer = Dense(first, batch_input_shape=(None, input_dim), init='uniform', activation='relu') self.model.add(first_layer) self.target_model.add(first_layer) else: first_conv = Convolution2D(128, 3, 3, border_mode='same', input_shape=input_dim, dim_ordering='th') first_pool = MaxPooling2D(pool_size=(2, 2)) second_conv = Convolution2D(64, 3, 3, border_mode='same') second_pool = MaxPooling2D(pool_size=(2, 2)) flatten = Flatten() conv_layers = [ first_conv, first_pool, second_conv, second_pool, flatten ] for layer in conv_layers: self.model.add(layer) self.target_model.add(layer) for layer in layers: l = Dense(layer, activation='relu', init='uniform') self.model.add(l) self.target_model.add(l) # end with linear to sum to Q-value end_layer = Dense(end, activation='linear', init='uniform') self.model.add(end_layer) self.target_model.add(end_layer) def learn(self, s0, s1, reward, action, done, t): self.store_experience(s0, s1, reward, action, done, t) if self.exp_ct < self.total_memory: self.exp_ct += 1 self.exp_index += 1 self.exp_index %= self.total_memory if self.exp_index % self.target_freeze_duration == 0: print("Copied to target network") self.target_model.set_weights(self.model.get_weights()) if self.exp_ct > DeepQLearner.MINIMUM_EXPERIENCE: self.experience_replay() def store_experience(self, s0, s1, reward, action, done, t): memory = np.vstack((np.array(s0).reshape(len(s0), 1), np.array(s1).reshape(len(s1), 1), reward, action, done, t)) self.experience[:, self.exp_index] = memory.flatten() def experience_replay(self): subset_index = np.random.choice(self.exp_ct, self.batch_size, replace=False) # this hideous mess encodes an experience subset = self.experience[:, subset_index] s0 = subset[:self.input_dim, :] s1 = subset[self.input_dim:self.input_dim * 2, :] r = subset[self.input_dim * 2, :] action = subset[self.input_dim * 2 + 1, :] done = subset[self.input_dim * 2 + 2, :] t = subset[self.input_dim * 2 + 3, :] s0_q_values = self.model.predict(s0.T) s1_q_values = self.future_fn(s1) # update Q values for k, q in enumerate(s0_q_values): a = int(action[k]) if bool(done[k]): q[a] = self.terminal_fn(r[k], t[k]) else: q[a] = r[k] + self.discount * s1_q_values[k] loss = self.model.train_on_batch(s0.T, s0_q_values) def future_fn(self, s1): return np.amax(self.target_model.predict(s1.T), axis=1) def act(self, observation): observation = observation.reshape(1, *observation.shape) q = self.model.predict(observation) if random.random() < self.eps: action = math.floor(random.random() * self.actions) else: action = np.argmax(q) self.eps = max(0, self.eps + self.eps_dt) return action
model.get_config() # eg config = model.get_config() model.from_config(config=config) # or model = Sequential.from_config(config=config) # 3 get layer 依据层名或下标获得层对象 layer2 = model.get_layer('dense_2') print(layer2) # 4 获取权重 weights = model.get_weights() # 5 设置model的权重 model.set_weights(weights) # 6 to_json,返回模型的json字符串(仅包含模型的结构,不包含权重) from keras.models import model_from_json, model_from_yaml json_model_string = model.to_json() model = model_from_json(json_string=json_model_string) # 7 to_yaml yaml_string = model.to_yaml() model = model_from_yaml(yaml_string=yaml_string) # 8 save weights ,将权重保存到指定路径,文件后缀名 filename.h5 model.save_weights('saved/basic_model_weights.h5') # 9 load weiths 从HDF5文件中加载权重到当前模型中, 默认情况下模型的结构将保持不变。如果想将权重载入不同的模型(有些层相同)中,则设置by_name=True,只有名字匹配的层才会载入权重 model.load_weights('saved/basic_model_weights.h5')
class nnet: def __init__(self, layer_vector, learning_rate=0.1, decay_value=1e-6, momentum_value=0.9, nest=True): # Layer vector is a vector of node numbers. For a network with 4 input nodes, 10 hidden nodes and 1 output node # layer_vector should read [4,10,1] self.layer_vector = layer_vector # These will be the network parameters set after training self.parameters = None # The normalization parameters set by the training data self.normalization = [] self.model = Sequential() # Now we specify the model from layer_vector self.model.add( Dense(layer_vector[1], input_dim=layer_vector[0], init='uniform', bias=True)) self.num_additions = 0 for i in range(1, len(layer_vector) - 1): # This "layer" object applies the activation from the output of the previous self.model.add(Activation('tanh')) # Adding the next layer self.model.add( Dense(layer_vector[i + 1], init='uniform', bias=True)) self.num_additions += 2 self.model.add(Activation('tanh')) self.output_function = K.function( [self.model.layers[0].input], [self.model.layers[self.num_additions + 1].output]) # Stochastic Gradient Descent sgd = SGD(lr=learning_rate, decay=decay_value, momentum=momentum_value, nesterov=nest) # This compiles the network self.model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy']) def get_parameters(self): return self.parameters # This function sets the parameters from a given list def set_parameters(self, param): self.model.set_weights(param) self.parameters = param def get_normalization(self): return self.normalization def set_normalization(self, normal): self.normalization = normal def find_normalization_parameters(self, data): # Scaling is as follows; Do standardization on all the variables; x' = (x - mu)/sigma # Then apply the sigmoid function; x'' = 1/(1 + e^(-x)) # Then search for nearest neighbors and scale by <RMS> self.normalization = [] means = np.asarray(data).mean(axis=0) sigmas = np.asarray(data).std(axis=0) # Now make a copy and apply the standardization temp_data = np.copy(data) for i in range(len(temp_data[0])): for j in range(len(temp_data)): #print temp_data[j][i] temp_data[j][i] = (temp_data[j][i] - means[i]) / sigmas[i] # Now apply the sigmoid function for i in range(len(temp_data[0])): for j in range(len(temp_data)): #print temp_data[j][i] temp_data[j][i] = 1 / (1 + np.exp(-temp_data[j][i])) total_num = 1000 # Now find the nearest neighbor <RMS> squares = np.zeros(len(temp_data[0])) num_points = np.random.randint(0, len(temp_data), total_num) temp_data_2 = [ temp_data[num_points[i]] for i in range(len(num_points)) ] for i in range(total_num): temp_neighbor = 0 temp_distance = 10e6 temp_point = temp_data_2[i] for j in range(len(temp_data_2)): if i != j: temp_dist = 0.0 for l in range(len(temp_point)): temp_dist += np.power( (temp_point[l] - temp_data_2[j][l]), 2.0) if np.sqrt(temp_dist) <= temp_distance: temp_distance = np.sqrt(temp_dist) temp_neighbor = j #print "Found nearest neighbor for point ", i, " at point ", j for l in range(len(squares)): squares[l] += (np.power( (temp_data_2[i][l] - temp_data_2[temp_neighbor][l]), 2.0) / total_num) max_square = squares[0] for i in range(len(squares)): if (squares[i] >= max_square): max_square = squares[i] squares = squares / max_square # Now apply the nearest neighbor <RMS> to find a new mean for i in range(len(temp_data[0])): for j in range(len(temp_data)): #print temp_data[j][i] temp_data[j][i] = temp_data[j][i] / np.sqrt(squares[i]) num_points = np.random.randint(0, len(temp_data), total_num) temp_data_2 = [ temp_data[num_points[i]] for i in range(len(num_points)) ] new_means = np.asarray(temp_data_2).mean(axis=0) # Now save these parameters for i in range(len(squares)): self.normalization.append( [means[i], sigmas[i], np.sqrt(squares[i]), new_means[i]]) # Trying to do this with a list comprehension is tricky def normalize_data(self, data): for i in range(len(self.normalization)): for j in range(len(data)): #print data[j][i]() data[j][i] = ( (1 / (1 + np.exp(-((data[j][i] - self.normalization[i][0]) / self.normalization[i][1])))) / self.normalization[i][2]) - self.normalization[i][3] #data[j][i] = (1 / (1 + np.exp(-( data[j][i] - self.normalization[i][0] ) / self.normalization[i][1]))) / self.normalization[i][2]# / self.normalization[i][2] return data # This function trains the network on specified training data def train_network(self, training_data, training_answer, num_epochs=1, batch=256): train_data = np.copy(training_data) # Anytime we are training a network, we must renormalize according to the data self.find_normalization_parameters(training_data) train_data = self.normalize_data(train_data) # The training session self.model.fit(train_data, training_answer, nb_epoch=num_epochs, batch_size=batch) # Saves the weights from training to the parameters attribute self.parameters = self.model.get_weights() # This function evaluates test data against the trained network def evaluate_network(self, testing_data, testing_answer, score_output=True): test_data = np.copy(testing_data) # We don't want to normalize the actual testing data, only a copy of it test_data = self.normalize_data(test_data) if (score_output == True): score = self.model.evaluate(test_data, testing_answer, batch_size=100) # Prints a score for the network based on the training data print('Score: %s' % (score)) activations = self.output_function([test_data]) return [[activations[0][i][0], testing_answer[i]] for i in range(len(testing_answer))] # This takes the network output and splits them into separate signal and background variables def split_binary_results(self, results): signal = [] background = [] for i in range(len(results)): if (results[i][1] == -1.0): background.append(results[i][0]) else: signal.append(results[i][0]) return signal, background def network_output_jsd(self, signal, background, neighbors=3): new_signal = [[signal[i]] for i in range(len(signal))] new_background = [[background[i]] for i in range(len(background))] new_ans1 = [[1.0] for s in range(len(signal))] new_ans2 = [[-1.0] for s in range(len(background))] new_ans = new_ans1 + new_ans2 new_data = new_signal + new_background return mi(new_data, new_ans, k=neighbors) # We plot three things, a histogram of the network output and two ROC curves (and accept/reject and accept/accept for signal and background) def plot_network_output(self, results, save_cuts=True, symmetric=True): keras_signal, keras_background = self.split_binary_results(results) keras_signal_acc, keras_signal_rej, keras_background_acc, keras_background_rej = acceptance_rejection_cuts( keras_signal, keras_background, symmetric_signal=symmetric) keras_sig_acc_back_rej_AUC = self.area_under_ROC_curve( keras_signal_acc, keras_background_rej) if (save_cuts == True): cut_values = [[keras_signal_acc[i], keras_background_acc[i]] for i in range(len(keras_signal_acc))] with open('cut_values.csv', 'w') as cut_file: writer = csv.writer(cut_file) writer.writerows(cut_values) print('AUC: Background Rej. vs. Signal Acc.;') print('Keras/Tensorflow: %s' % (keras_sig_acc_back_rej_AUC)) # Now plotting our results plt.figure(1) plt.hist(keras_signal, 100, normed='True', alpha=0.5, facecolor='blue', label='Signal', hatch="/") plt.hist(keras_background, 100, normed='True', alpha=0.5, facecolor='red', label='Background', hatch="/") plt.legend(loc='upper right') plt.title('Keras/TensorFlow test data histogram') plt.savefig('nn_hists.png') plt.figure(2) plt.plot(keras_signal_acc, keras_background_acc, linestyle='None') plt.scatter(keras_signal_acc, keras_background_acc, color='k', label='keras/tensorflow') plt.xlabel('Signal Acceptance') plt.ylabel('Background Acceptance') plt.yscale('log') plt.xscale('log') plt.legend(loc='upper left', shadow=True, title='Legend', fancybox=True) plt.grid(True) plt.title('Bkd Acc vs. Sig Acc') plt.ylim(0.0, 1.0) plt.xlim(0.0, 1.0) plt.figure(3) plt.plot(keras_signal_acc, keras_background_rej, linestyle='None') plt.scatter(keras_signal_acc, keras_background_rej, color='k', label='keras/tensorflow') plt.xlabel('Signal Acceptance') plt.ylabel('Background Rejection') plt.legend(loc='upper left', shadow=True, title='Legend', fancybox=True) plt.grid(True) plt.title('Bkd Rej vs. Sig Acc') plt.ylim(0.0, 1.0) plt.xlim(0.0, 1.0) plt.show() #This can get passed two CDF or inverse CDF values to calculate the total area under the receiver-operator-characteristic def area_under_ROC_curve(self, signal, background): num_data_points = len(signal) area_under_curve = 0.0 for i in range(num_data_points - 1): area_under_curve += background[i] * (signal[i + 1] - signal[i]) return area_under_curve # This saves the network parameters to a file which can be restored later def save_network_params_to_file(self, file): with open(file, 'w') as param_file: writer = csv.writer(param_file) writer.writerows(self.parameters) # This saves the network output along with the correspond value in the variable space def save_network_score_to_file(self, data, results, file): # Need to make a new list with [ [ data, result[0] ] ] and then write that to file output_list = list() for i in range(len(data)): output_list.append(np.concatenate((data[i], [results[i][0]]))) with open(file, 'w') as ntuple_file: writer = csv.writer(ntuple_file) writer.writerows(output_list) print('Wrote data to file; %s' % (file))
def start(self): """ Run the Aggregator. """ LOGGER.info('Waiting on quorum') self.wait_for_workers_to_join() LOGGER.info('Quorum of workers found') LOGGER.info('Starting training') model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss=losses.categorical_crossentropy, optimizer=optimizers.Adam(lr=self.learning_rate), metrics=['accuracy']) LOGGER.info('Distributing neural network architecture to participants') model_json = json.dumps(model.to_json()) with self.comms: self.comms.send({'model': model_json}) import time for iter in range(self.round): start = time.time() LOGGER.info("Round " + str(iter)) LOGGER.info( 'Asking participants to update model weights, do local training and send back model update' ) model_weights = json.dumps({'weights': model.get_weights()}, cls=NumpyEncoder) with self.comms: self.comms.send({'model_weights': model_weights}) weight_updates = self.wait_for_workers_to_complete() LOGGER.info( 'Received model updates from all participants, start updating the central model' ) list_updates = [] for weight_update in weight_updates: rsp = self.get_result(weight_update) weight = json.loads(json.loads(rsp.content))['weight_update'] weight = np.array([np.array(w) for w in weight]) list_updates.append(weight) model.set_weights(np.mean(np.array(list_updates), axis=0)) [loss, accuracy] = model.evaluate(self.feature, self.label, verbose=0) end = time.time() LOGGER.info("Round %d, loss %f, val accuracy %f, time %f" % (iter, loss, accuracy, end - start)) LOGGER.info('Finished %d rounds, done' % self.round) [_, accuracy] = model.evaluate(self.feature, self.label) LOGGER.info("Test accuracy %f" % accuracy) LOGGER.info('END') return { 'model_weights': json.dumps({'weights': model.get_weights()}, cls=NumpyEncoder) }
# ax.plot(temp, temp_class, color='purple') # plt.legend(['model', 'class', 'data']) # plt.show() # # Train / Test split # print("\n\n### Train / Test split") from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) params = model.get_weights( ) # [array([[ 1.91573226]], dtype=float32), array([-2.83298182], dtype=float32)] params = [np.zeros(w.shape) for w in params] # [array([[ 0.]]), array([ 0.])] model.set_weights(params) print("weight", model.get_weights()) print("accuracy score {:0.3f}".format(accuracy_score(y, model.predict(X) > 0.5))) print("fitting train set") model.fit(X_train, y_train, epochs=25, verbose=0) print("train accuracy score {:0.3f}".format( accuracy_score(y_train, model.predict(X_train) > 0.5))) print("test accuracy score {:0.3f}".format( accuracy_score(y_test, model.predict(X_test) > 0.5)))
class GAN(object): def __init__(self): #Models self.D = None self.G = None self.OD = None self.DM = None self.AM = None #Config self.LR = 0.0001 self.steps = 1 def discriminator(self): if self.D: return self.D self.D = Sequential() #add Gaussian noise to prevent Discriminator overfitting self.D.add(GaussianNoise(0.2, input_shape=[256, 256, 3])) #256x256x3 Image self.D.add(Conv2D(filters=8, kernel_size=3, padding='same')) self.D.add(LeakyReLU(0.2)) self.D.add(Dropout(0.25)) self.D.add(AveragePooling2D()) #128x128x8 self.D.add(Conv2D(filters=16, kernel_size=3, padding='same')) self.D.add(BatchNormalization(momentum=0.7)) self.D.add(LeakyReLU(0.2)) self.D.add(Dropout(0.25)) self.D.add(AveragePooling2D()) #64x64x16 self.D.add(Conv2D(filters=32, kernel_size=3, padding='same')) self.D.add(BatchNormalization(momentum=0.7)) self.D.add(LeakyReLU(0.2)) self.D.add(Dropout(0.25)) self.D.add(AveragePooling2D()) #32x32x32 self.D.add(Conv2D(filters=64, kernel_size=3, padding='same')) self.D.add(BatchNormalization(momentum=0.7)) self.D.add(LeakyReLU(0.2)) self.D.add(Dropout(0.25)) self.D.add(AveragePooling2D()) #16x16x64 self.D.add(Conv2D(filters=128, kernel_size=3, padding='same')) self.D.add(BatchNormalization(momentum=0.7)) self.D.add(LeakyReLU(0.2)) self.D.add(Dropout(0.25)) self.D.add(AveragePooling2D()) #8x8x128 self.D.add(Conv2D(filters=256, kernel_size=3, padding='same')) self.D.add(BatchNormalization(momentum=0.7)) self.D.add(LeakyReLU(0.2)) self.D.add(Dropout(0.25)) self.D.add(AveragePooling2D()) #4x4x256 self.D.add(Flatten()) #256 self.D.add(Dense(128)) self.D.add(LeakyReLU(0.2)) self.D.add(Dense(1, activation='sigmoid')) return self.D def generator(self): if self.G: return self.G self.G = Sequential() self.G.add(Reshape(target_shape=[1, 1, 4096], input_shape=[4096])) #1x1x4096 self.G.add(Conv2DTranspose(filters=256, kernel_size=4)) self.G.add(Activation('relu')) #4x4x256 - kernel sized increased by 1 self.G.add(Conv2D(filters=256, kernel_size=4, padding='same')) self.G.add(BatchNormalization(momentum=0.7)) self.G.add(Activation('relu')) self.G.add(UpSampling2D()) #8x8x256 - kernel sized increased by 1 self.G.add(Conv2D(filters=128, kernel_size=4, padding='same')) self.G.add(BatchNormalization(momentum=0.7)) self.G.add(Activation('relu')) self.G.add(UpSampling2D()) #16x16x128 self.G.add(Conv2D(filters=64, kernel_size=3, padding='same')) self.G.add(BatchNormalization(momentum=0.7)) self.G.add(Activation('relu')) self.G.add(UpSampling2D()) #32x32x64 self.G.add(Conv2D(filters=32, kernel_size=3, padding='same')) self.G.add(BatchNormalization(momentum=0.7)) self.G.add(Activation('relu')) self.G.add(UpSampling2D()) #64x64x32 self.G.add(Conv2D(filters=16, kernel_size=3, padding='same')) self.G.add(BatchNormalization(momentum=0.7)) self.G.add(Activation('relu')) self.G.add(UpSampling2D()) #128x128x16 self.G.add(Conv2D(filters=8, kernel_size=3, padding='same')) self.G.add(Activation('relu')) self.G.add(UpSampling2D()) #256x256x8 self.G.add(Conv2D(filters=3, kernel_size=3, padding='same')) self.G.add(Activation('sigmoid')) return self.G def DisModel(self): if self.DM == None: self.DM = Sequential() self.DM.add(self.discriminator()) self.DM.compile(optimizer=Adam(lr=self.LR * (0.85**floor(self.steps / 10000))), loss='binary_crossentropy') return self.DM def AdModel(self): if self.AM == None: self.AM = Sequential() self.AM.add(self.generator()) self.AM.add(self.discriminator()) self.AM.compile(optimizer=Adam(lr=self.LR * (0.85**floor(self.steps / 10000))), loss='binary_crossentropy') return self.AM def sod(self): self.OD = self.D.get_weights() def lod(self): self.D.set_weights(self.OD)
Flush() ]) stopping_times.append(len(results.epoch)) print "stopped after ", stopping_times[-1], "epochs" # Divide the learning rate by 10 backend.set_value(adam.lr, 0.1 * backend.get_value(adam.lr)) # Now we will retrain the model again keeping in mind the stopping times that # we got by the early stopping procedure adam = Adam(lr=INITIAL_ADAM_LEARNING_RATE) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) model.set_weights(saved_initial_weights) for i in range(2): results = model.fit(np.concatenate((X_train, X_val, X_test)), np.concatenate((y_train, y_val, y_test)), batch_size=MINIBATCH_SIZE, nb_epoch=stopping_times[i], shuffle=True, verbose=2, callbacks=[Flush()]) # Divide the learning rate by 10 backend.set_value(adam.lr, 0.1 * backend.get_value(adam.lr)) # Save the model representation and weights to files model_in_json = model.to_json()
else: raise Exception("language feature dim error!") totalloss += loss[0] if (j+1) % 100 == 0: print 'epoch #{:03d}, batch #{:03d}, current avg loss = {:.3f}'.format(i+1, j+1, totalloss/(j+1)) logfile.write('epoch #{:03d}, batch #{:03d}, current avg loss = {:.3f}\n'.format(i+1, j+1, totalloss/(j+1))) if (i+1) % 5 == 0: model.save_weights(model_file_name + '_epoch_{:05d}_loss_{:.3f}.hdf5'.format(i+1, totalloss/batchNum)) else: # cross valid & training dataSize = len(idList) setSize = dataSize / arg.cross_valid crossvalidList = [] for k in xrange(arg.cross_valid): #reset the weights to initial model.set_weights(weights_save) # cut train and valid id list if k == arg.cross_valid -1: validIdList = idList[k * setSize:] trainIdList = idList[:k * setSize] else: validIdList = idList[k * setSize : (k + 1) * setSize] trainIdList = idList[:k * setSize] + idList[(k + 1) * setSize:] # for save avg totalerr in each cross validation totalerror = 0 for i in xrange(arg.epochs): #print 'valid #{:02d}, epoch #{:03d}'.format(k+1, i+1) #logfile.write('valid #{:02d}, epoch #{:03d}\n'.format(k+1, i+1)) # training
class Network(object): def __init__(self, parameters, modelName=None): self.parameters = parameters self.gpus = self.parameters.NUM_GPUS # Q-learning self.discount = self.parameters.DISCOUNT self.epsilon = self.parameters.EPSILON self.frameSkipRate = self.parameters.FRAME_SKIP_RATE if self.parameters.GAME_NAME == "Agar.io": self.gridSquaresPerFov = self.parameters.GRID_SQUARES_PER_FOV # CNN if self.parameters.CNN_REPR: # (KernelSize, stride, filterNum) self.kernel_1 = self.parameters.CNN_L1 self.kernel_2 = self.parameters.CNN_L2 self.kernel_3 = self.parameters.CNN_L3 if self.parameters.CNN_USE_L1: self.stateReprLen = self.parameters.CNN_INPUT_DIM_1 elif self.parameters.CNN_USE_L2: self.stateReprLen = self.parameters.CNN_INPUT_DIM_2 else: self.stateReprLen = self.parameters.CNN_INPUT_DIM_3 else: self.stateReprLen = self.parameters.STATE_REPR_LEN if parameters.SQUARE_ACTIONS: self.actions = createDiscreteActionsSquare( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) else: self.actions = createDiscreteActionsCircle( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) self.num_actions = len(self.actions) else: import gym env = gym.make(self.parameters.GAME_NAME) if self.parameters.CNN_REPR: pass else: self.stateReprLen = env.observation_space.shape[0] self.num_actions = env.action_space.n self.actions = list(range(self.num_actions)) # ANN self.learningRate = self.parameters.ALPHA self.optimizer = self.parameters.OPTIMIZER if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": self.activationFuncHidden = "linear" # keras.layers.ELU(alpha=eluAlpha) else: self.activationFuncHidden = self.parameters.ACTIVATION_FUNC_HIDDEN self.activationFuncLSTM = self.parameters.ACTIVATION_FUNC_LSTM self.activationFuncOutput = self.parameters.ACTIVATION_FUNC_OUTPUT self.layers = parameters.Q_LAYERS if self.parameters.USE_ACTION_AS_INPUT: inputDim = self.stateReprLen + 4 outputDim = 1 else: inputDim = self.stateReprLen outputDim = self.num_actions if self.parameters.EXP_REPLAY_ENABLED: input_shape_lstm = (self.parameters.MEMORY_TRACE_LEN, inputDim) stateful_training = False self.batch_len = self.parameters.MEMORY_BATCH_LEN else: input_shape_lstm = (1, inputDim) stateful_training = True self.batch_len = 1 if self.parameters.INITIALIZER == "glorot_uniform": initializer = keras.initializers.glorot_uniform() elif self.parameters.INITIALIZER == "glorot_normal": initializer = keras.initializers.glorot_normal() else: weight_initializer_range = math.sqrt( 6 / (self.stateReprLen + self.num_actions)) initializer = keras.initializers.RandomUniform( minval=-weight_initializer_range, maxval=weight_initializer_range, seed=None) # CNN if self.parameters.CNN_REPR: if self.parameters.CNN_P_REPR: # RGB if self.parameters.CNN_P_RGB: channels = 3 # GrayScale else: channels = 1 if self.parameters.CNN_LAST_GRID: channels = channels * 2 self.input = Input(shape=(self.stateReprLen, self.stateReprLen, channels)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_last')(conv) self.valueNetwork = Flatten()(conv) # Not pixel input else: # Vision grid merging self.input = Input(shape=(self.parameters.NUM_OF_GRIDS, self.stateReprLen, self.stateReprLen)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')(conv) self.valueNetwork = Flatten()(conv) # Fully connected layers if self.parameters.NEURON_TYPE == "MLP": layerIterable = iter(self.layers) regularizer = keras.regularizers.l2(self.parameters.Q_WEIGHT_DECAY) if self.parameters.DROPOUT: constraint = maxnorm(self.parameters.MAXNORM) else: constraint = None if parameters.CNN_REPR: previousLayer = self.valueNetwork else: self.input = Input(shape=(inputDim, )) previousLayer = self.input for layer in layerIterable: if layer > 0: if self.parameters.DROPOUT: previousLayer = Dropout( self.parameters.DROPOUT)(previousLayer) previousLayer = Dense( layer, activation=self.activationFuncHidden, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": previousLayer = (keras.layers.ELU( alpha=self.parameters.ELU_ALPHA))(previousLayer) if self.parameters.BATCHNORM: previousLayer = BatchNormalization()(previousLayer) if self.parameters.DROPOUT: previousLayer = Dropout(self.parameters.DROPOUT)(previousLayer) output = Dense(outputDim, activation=self.activationFuncOutput, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) self.valueNetwork = keras.models.Model(inputs=self.input, outputs=output) elif self.parameters.NEURON_TYPE == "LSTM": # Hidden Layer 1 # TODO: Use CNN with LSTM # if self.parameters.CNN_REPR: # hidden1 = LSTM(self.hiddenLayer1, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len) # else: # hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences = True, # stateful= stateful_training, batch_size=self.batch_len) hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)(self.valueNetwork) # Hidden 2 if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)( self.valueNetwork) # Hidden 3 if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)( self.valueNetwork) # Output layer output = LSTM(outputDim, activation=self.activationFuncOutput, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)(self.valueNetwork) self.valueNetwork = keras.models.Model(inputs=self.input, outputs=output) # Create target network self.valueNetwork._make_predict_function() self.targetNetwork = keras.models.clone_model(self.valueNetwork) self.targetNetwork.set_weights(self.valueNetwork.get_weights()) if self.parameters.OPTIMIZER == "Adam": if self.parameters.GRADIENT_CLIP_NORM: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipnorm=self.parameters.GRADIENT_CLIP_NORM, amsgrad=self.parameters.AMSGRAD) elif self.parameters.GRADIENT_CLIP: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipvalue=self.parameters.GRADIENT_CLIP, amsgrad=self.parameters.AMSGRAD) else: optimizer = keras.optimizers.Adam( lr=self.learningRate, amsgrad=self.parameters.AMSGRAD) elif self.parameters.OPTIMIZER == "Nadam": optimizer = keras.optimizers.Nadam(lr=self.learningRate) elif self.parameters.OPTIMIZER == "Adamax": optimizer = keras.optimizers.Adamax(lr=self.learningRate) elif self.parameters.OPTIMIZER == "SGD": if self.parameters.NESTEROV: optimizer = keras.optimizers.SGD( lr=self.learningRate, momentum=self.parameters.NESTEROV, nesterov=True) else: optimizer = keras.optimizers.SGD(lr=self.learningRate) self.optimizer = optimizer self.valueNetwork.compile(loss='mse', optimizer=optimizer) self.targetNetwork.compile(loss='mse', optimizer=optimizer) self.model = self.valueNetwork if self.parameters.NEURON_TYPE == "LSTM": # We predict using only one state input_shape_lstm = (1, self.stateReprLen) self.actionNetwork = Sequential() hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=True, batch_size=1, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden1) if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden2) if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden3) self.actionNetwork.add( LSTM(self.num_actions, activation=self.activationFuncOutput, return_sequences=False, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)) self.actionNetwork.compile(loss='mse', optimizer=optimizer) # if __debug__: print(self.valueNetwork.summary()) # print("\n") if modelName is not None: self.load(modelName) self.targetNetwork._make_predict_function() sess = tf.Session() sess.run(tf.global_variables_initializer()) self.graph = tf.get_default_graph() # Necessary for multiprocessing to warm up the network def dummy_prediction(self): if self.parameters.CNN_REPR: input_shape = ([ self.parameters.NUM_OF_GRIDS, self.stateReprLen, self.stateReprLen ]) else: input_shape = (self.stateReprLen, ) dummy_input = numpy.zeros(input_shape) dummy_input = numpy.array([dummy_input]) self.predict(dummy_input) def reset_general(self, model): session = K.get_session() for layer in model.layers: for v in layer.__dict__: v_arg = getattr(layer, v) if hasattr(v_arg, 'initializer'): initializer_method = getattr(v_arg, 'initializer') initializer_method.run(session=session) print('reinitializing layer {}.{}'.format(layer.name, v)) def reset_weights(self): self.reset_general(self.valueNetwork) self.reset_general(self.targetNetwork) def reset_hidden_states(self): self.actionNetwork.reset_states() self.valueNetwork.reset_states() self.targetNetwork.reset_states() def load(self, modelName): path = modelName + "model.h5" self.valueNetwork = keras.models.load_model(path) self.targetNetwork = load_model(path) def setWeights(self, weights): self.valueNetwork.set_weights(weights) def trainOnBatch(self, inputs, targets, importance_weights): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) else: return self.valueNetwork.train_on_batch( numpy.array([numpy.array([inputs])]), numpy.array([numpy.array([targets])])) else: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) def updateActionNetwork(self): self.actionNetwork.set_weights(self.valueNetwork.get_weights()) def updateTargetNetwork(self): self.targetNetwork.set_weights(self.valueNetwork.get_weights()) if __debug__: print("Target Network updated.") def predict(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.valueNetwork.predict(state, batch_size=batch_len) else: return self.valueNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: state = numpy.array([state]) with self.graph.as_default(): prediction = self.valueNetwork.predict(state)[0] return prediction def predictTargetQValues(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict_target_network( numpy.array([numpy.concatenate((state[0], act))]))[0] for act in self.actions ] else: return self.predict_target_network(state) def predict_target_network(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.targetNetwork.predict(state, batch_size=batch_len) else: return self.targetNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: state = numpy.array([state]) return self.targetNetwork.predict(state)[0] else: return self.targetNetwork.predict(state)[0] def predict_action_network(self, trace): return self.actionNetwork.predict(numpy.array([numpy.array([trace]) ]))[0] def predict_action(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict(numpy.array([numpy.concatenate( (state[0], act))]))[0] for act in self.actions ] else: if self.parameters.NEURON_TYPE == "MLP": return self.predict(state) else: return self.predict_action_network(state) def saveModel(self, path, name=""): if not os.path.exists(path + "models/"): os.mkdir(path + "models/") self.targetNetwork.set_weights(self.valueNetwork.get_weights()) complete = False while not complete: try: self.targetNetwork.save(path + "models/" + name + "model.h5") complete = True except Exception: print("Error saving network. ########################") complete = False print("Trying to save again...") def setEpsilon(self, val): self.epsilon = val def setFrameSkipRate(self, value): self.frameSkipRate = value def getParameters(self): return self.parameters def getNumOfActions(self): return self.num_actions def getEpsilon(self): return self.epsilon def getDiscount(self): return self.discount def getFrameSkipRate(self): return self.frameSkipRate def getGridSquaresPerFov(self): return self.gridSquaresPerFov def getStateReprLen(self): return self.stateReprLen def getNumActions(self): return self.num_actions def getLearningRate(self): return self.learningRate def getActivationFuncHidden(self): return self.activationFuncHidden def getActivationFuncOutput(self): return self.activationFuncOutput def getOptimizer(self): return self.optimizer def getActions(self): return self.actions def getTargetNetwork(self): return self.targetNetwork def getValueNetwork(self): return self.valueNetwork
print("Train...") model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=5, validation_split=0.1, show_accuracy=True) score = model.evaluate(X_test, Y_test, batch_size=batch_size) print('Test score:', score) classes = model.predict_classes(X_test, batch_size=batch_size) acc = np_utils.accuracy(classes, Y_test) print('Test accuracy:', acc) store_weights = {} for layer in model.layers : store_weights[layer] = layer.get_weights() # create a new model of same structure minus last layers, to explore intermediate outputs print('Build truncated model') chopped_model = Sequential() chopped_model.add(Embedding(max_features, 256)) chopped_model.add(LSTM(256, 128)) chopped_model.add(Dense(128, nb_classes)) chopped_model.set_weights(model.get_weights()) chopped_model.compile(loss='categorical_crossentropy', optimizer='adam', class_mode="categorical") # pickle intermediate activations, model weights, accuracy train_activations = chopped_model.predict(X_train, batch_size=batch_size) test_activations = chopped_model.predict(X_test, batch_size=batch_size) outputs = dict(final=classes, train_activations=train_activations, test_activations=test_activations, acc=acc) pkl.dump(outputs, open('results/predicted_activations_categories.pkl', 'wb'), protocol=pkl.HIGHEST_PROTOCOL)
wfv_window = 8 latest_news_date = y[:, 0].max() print("Walk-forward validation starts: ") print("\tWindow = {}".format(wfv_window)) print("\tStart Date = {}".format(hist[-1][0])) print("\tTotal folds = {}".format(len(hist) // wfv_window)) for i in range(1, len(hist) // wfv_window + 1): split_date = int(latest_news_date - i * wfv_window) # Extract training set train_idx = np.where(y[:, 0] < split_date) X_train, y_train = X[train_idx], y[train_idx, 1:].reshape(-1, 7) # Reinitialize weight model.set_weights(init_weight) # Fit model model.fit(X_train, y_train, epochs=3, callbacks=[], batch_size=64) # Define validation metrics scores, pred, loss = np.empty((7)), np.empty((7, 7)), np.empty((7, 7)) correct_trend = np.empty((7, 7), dtype=np.int8) for d in range(0, 7): val_idx = np.where(y[:, 0] == split_date + d) X_val, y_val = X[val_idx], y[val_idx, 1:].reshape(-1, 7) # Calculate validation score scores[d] = model.evaluate(X_val, y_val, verbose=0) # Predict future prices in a week pred[d] = model.predict(X_val).mean(axis=0) # Calculate errors in prediction
class Network(object): def __init__(self, parameters, modelName=None): self.parameters = parameters if parameters.SQUARE_ACTIONS: self.actions = createDiscreteActionsSquare( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) else: self.actions = createDiscreteActionsCircle( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) self.num_actions = len(self.actions) self.loadedModelName = None self.gpus = self.parameters.GPUS # Q-learning self.discount = self.parameters.DISCOUNT self.epsilon = self.parameters.EPSILON self.frameSkipRate = self.parameters.FRAME_SKIP_RATE self.gridSquaresPerFov = self.parameters.GRID_SQUARES_PER_FOV # CNN if self.parameters.CNN_REPR: # (KernelSize, stride, filterNum) self.kernel_1 = self.parameters.CNN_L1 self.kernel_2 = self.parameters.CNN_L2 self.kernel_3 = self.parameters.CNN_L3 if self.parameters.CNN_USE_L1: self.stateReprLen = self.parameters.CNN_INPUT_DIM_1 elif self.parameters.CNN_USE_L2: self.stateReprLen = self.parameters.CNN_INPUT_DIM_2 else: self.stateReprLen = self.parameters.CNN_INPUT_DIM_3 else: self.stateReprLen = self.parameters.STATE_REPR_LEN # ANN self.learningRate = self.parameters.ALPHA self.optimizer = self.parameters.OPTIMIZER if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": self.activationFuncHidden = "linear" # keras.layers.ELU(alpha=eluAlpha) else: self.activationFuncHidden = self.parameters.ACTIVATION_FUNC_HIDDEN self.activationFuncLSTM = self.parameters.ACTIVATION_FUNC_LSTM self.activationFuncOutput = self.parameters.ACTIVATION_FUNC_OUTPUT self.layers = parameters.Q_LAYERS if self.parameters.USE_ACTION_AS_INPUT: inputDim = self.stateReprLen + 4 outputDim = 1 else: inputDim = self.stateReprLen outputDim = self.num_actions if self.parameters.EXP_REPLAY_ENABLED: input_shape_lstm = (self.parameters.MEMORY_TRACE_LEN, inputDim) stateful_training = False self.batch_len = self.parameters.MEMORY_BATCH_LEN else: input_shape_lstm = (1, inputDim) stateful_training = True self.batch_len = 1 if self.parameters.INITIALIZER == "glorot_uniform": initializer = keras.initializers.glorot_uniform() elif self.parameters.INITIALIZER == "glorot_normal": initializer = keras.initializers.glorot_normal() else: weight_initializer_range = math.sqrt( 6 / (self.stateReprLen + self.num_actions)) initializer = keras.initializers.RandomUniform( minval=-weight_initializer_range, maxval=weight_initializer_range, seed=None) # CNN if self.parameters.CNN_REPR: if self.parameters.CNN_P_REPR: if self.parameters.CNN_P_INCEPTION: self.input = Input(shape=(self.stateReprLen, self.stateReprLen, 3)) tower_1 = Conv2D(self.kernel_2[2], (1, 1), padding='same', activation='relu')(self.input) tower_1 = Conv2D(self.kernel_2[2], (3, 3), padding='same', activation='relu')(tower_1) tower_2 = Conv2D(self.kernel_2[2], (1, 1), padding='same', activation='relu')(self.input) tower_2 = Conv2D(self.kernel_2[2], (5, 5), padding='same', activation='relu')(tower_2) tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(self.input) tower_3 = Conv2D(self.kernel_2[2], (1, 1), padding='same', activation='relu')(tower_3) self.valueNetwork = keras.layers.concatenate( [tower_1, tower_2, tower_3], axis=3) self.valueNetwork = keras.layers.Flatten()( self.valueNetwork) # DQN approach else: # RGB if self.parameters.CNN_P_RGB: channels = 3 # GrayScale else: channels = 1 if self.parameters.CNN_LAST_GRID: channels = channels * 2 if self.parameters.COORDCONV: channels += 2 self.input = Input(shape=(self.stateReprLen, self.stateReprLen, channels)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_last')(conv) self.valueNetwork = Flatten()(conv) # Not pixel input else: if self.parameters.CNN_TOWER: tower = [] self.input = [] self.towerModel = [] for grid in range(self.parameters.NUM_OF_GRIDS): self.input.append( Input(shape=(1, self.stateReprLen, self.stateReprLen))) if self.parameters.CNN_USE_L1: tower.append( Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_first')( self.input[grid])) if self.parameters.CNN_USE_L2: if self.parameters.CNN_USE_L1: tower[grid] = Conv2D( self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')(tower[grid]) else: tower.append( Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')( self.input[grid])) if self.parameters.CNN_USE_L3: if self.parameters.CNN_USE_L2: tower[grid] = Conv2D( self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')(tower[grid]) else: tower.append( Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')( self.input[grid])) tower[grid] = Flatten()(tower[grid]) self.valueNetwork = keras.layers.concatenate( [i for i in tower], axis=1) # Vision grid merging else: self.input = Input(shape=(self.parameters.NUM_OF_GRIDS, self.stateReprLen, self.stateReprLen)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')(conv) self.valueNetwork = Flatten()(conv) # Fully connected layers if self.parameters.NEURON_TYPE == "MLP": layerIterable = iter(self.layers) regularizer = keras.regularizers.l2(self.parameters.Q_WEIGHT_DECAY) if self.parameters.DROPOUT: constraint = maxnorm(self.parameters.MAXNORM) else: constraint = None if parameters.CNN_REPR: previousLayer = self.input extraInputSize = self.parameters.EXTRA_INPUT if extraInputSize > 0: extraInput = Input(shape=(extraInputSize, )) self.input = [self.input, extraInput] denseInput = keras.layers.concatenate( [self.valueNetwork, extraInput]) previousLayer = Dense( next(layerIterable), activation=self.activationFuncHidden, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer)(denseInput) else: self.input = Input(shape=(inputDim, )) previousLayer = self.input for layer in layerIterable: if layer > 0: if self.parameters.DROPOUT: previousLayer = Dropout( self.parameters.DROPOUT)(previousLayer) previousLayer = Dense( layer, activation=self.activationFuncHidden, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": previousLayer = (keras.layers.ELU( alpha=self.parameters.ELU_ALPHA))(previousLayer) if self.parameters.BATCHNORM: previousLayer = BatchNormalization()(previousLayer) if self.parameters.DROPOUT: previousLayer = Dropout(self.parameters.DROPOUT)(previousLayer) output = Dense(outputDim, activation=self.activationFuncOutput, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) self.valueNetwork = keras.models.Model(inputs=self.input, outputs=output) elif self.parameters.NEURON_TYPE == "LSTM": # Hidden Layer 1 # TODO: Use CNN with LSTM # if self.parameters.CNN_REPR: # hidden1 = LSTM(self.hiddenLayer1, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len) # else: # hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences = True, # stateful= stateful_training, batch_size=self.batch_len) hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(hidden1) # Hidden 2 if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(hidden2) # Hidden 3 if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(hidden3) # Output layer output = LSTM(outputDim, activation=self.activationFuncOutput, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(output) # Create target network self.targetNetwork = keras.models.clone_model(self.valueNetwork) self.targetNetwork.set_weights(self.valueNetwork.get_weights()) if self.parameters.OPTIMIZER == "Adam": if self.parameters.GRADIENT_CLIP_NORM: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipnorm=self.parameters.GRADIENT_CLIP_NORM, amsgrad=self.parameters.AMSGRAD) elif self.parameters.GRADIENT_CLIP: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipvalue=self.parameters.GRADIENT_CLIP, amsgrad=self.parameters.AMSGRAD) else: optimizer = keras.optimizers.Adam( lr=self.learningRate, amsgrad=self.parameters.AMSGRAD) elif self.parameters.OPTIMIZER == "Nadam": optimizer = keras.optimizers.Nadam(lr=self.learningRate) elif self.parameters.OPTIMIZER == "Adamax": optimizer = keras.optimizers.Adamax(lr=self.learningRate) elif self.parameters.OPTIMIZER == "SGD": if self.parameters.NESTEROV: optimizer = keras.optimizers.SGD( lr=self.learningRate, momentum=self.parameters.NESTEROV, nesterov=True) else: optimizer = keras.optimizers.SGD(lr=self.learningRate) self.optimizer = optimizer self.valueNetwork.compile(loss='mse', optimizer=optimizer) self.targetNetwork.compile(loss='mse', optimizer=optimizer) self.model = self.valueNetwork if self.parameters.NEURON_TYPE == "LSTM": # We predict using only one state input_shape_lstm = (1, self.stateReprLen) self.actionNetwork = Sequential() hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=True, batch_size=1, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden1) if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden2) if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden3) self.actionNetwork.add( LSTM(self.num_actions, activation=self.activationFuncOutput, return_sequences=False, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)) self.actionNetwork.compile(loss='mse', optimizer=optimizer) print(self.valueNetwork.summary()) print("\n") if modelName is not None: self.load(modelName) def reset_general(self, model): session = K.get_session() for layer in model.layers: for v in layer.__dict__: v_arg = getattr(layer, v) if hasattr(v_arg, 'initializer'): initializer_method = getattr(v_arg, 'initializer') initializer_method.run(session=session) print('reinitializing layer {}.{}'.format(layer.name, v)) def reset_weights(self): self.reset_general(self.valueNetwork) self.reset_general(self.targetNetwork) def reset_hidden_states(self): self.actionNetwork.reset_states() self.valueNetwork.reset_states() self.targetNetwork.reset_states() def load(self, modelName): path = modelName self.loadedModelName = modelName self.valueNetwork = keras.models.load_model(path + "model.h5") self.targetNetwork = load_model(path + "model.h5") def trainOnBatch(self, inputs, targets, importance_weights): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) else: return self.valueNetwork.train_on_batch( numpy.array([numpy.array([inputs])]), numpy.array([numpy.array([targets])])) else: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) def updateActionNetwork(self): self.actionNetwork.set_weights(self.valueNetwork.get_weights()) def updateTargetNetwork(self): self.targetNetwork.set_weights(self.valueNetwork.get_weights()) def predict(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.valueNetwork.predict(state, batch_size=batch_len) else: return self.valueNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: if self.parameters.CNN_TOWER: stateRepr = numpy.zeros( (len(state), 1, 1, len(state[0]), len(state[0]))) for gridIdx, grid in enumerate(state): stateRepr[gridIdx][0][0] = grid state = list(stateRepr) else: if len(state) == 2: grid = numpy.array([state[0]]) extra = numpy.array([state[1]]) state = [grid, extra] else: state = numpy.array([state]) return self.valueNetwork.predict(state)[0] def predictTargetQValues(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict_target_network( numpy.array([numpy.concatenate((state[0], act))]))[0] for act in self.actions ] else: return self.predict_target_network(state) def predict_target_network(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.targetNetwork.predict(state, batch_size=batch_len) else: return self.targetNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: if self.parameters.CNN_TOWER: stateRepr = numpy.zeros( (len(state), 1, 1, len(state[0]), len(state[0]))) for gridIdx, grid in enumerate(state): stateRepr[gridIdx][0][0] = grid stateRepr = list(stateRepr) return self.targetNetwork.predict(stateRepr)[0] else: if len(state) == 2: grid = numpy.array([state[0]]) extra = numpy.array([state[1]]) state = [grid, extra] else: state = numpy.array([state]) return self.targetNetwork.predict(state)[0] else: return self.targetNetwork.predict(state)[0] def predict_action_network(self, trace): return self.actionNetwork.predict(numpy.array([numpy.array([trace]) ]))[0] def predict_action(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict(numpy.array([numpy.concatenate( (state[0], act))]))[0] for act in self.actions ] else: if self.parameters.NEURON_TYPE == "MLP": return self.predict(state) else: return self.predict_action_network(state) def saveModel(self, path, name=""): self.targetNetwork.set_weights(self.valueNetwork.get_weights()) self.targetNetwork.save(path + name + "model.h5") def setEpsilon(self, val): self.epsilon = val def setFrameSkipRate(self, value): self.frameSkipRate = value def getParameters(self): return self.parameters def getNumOfActions(self): return self.num_actions def getEpsilon(self): return self.epsilon def getDiscount(self): return self.discount def getFrameSkipRate(self): return self.frameSkipRate def getGridSquaresPerFov(self): return self.gridSquaresPerFov def getTargetNetworkMaxSteps(self): return self.targetNetworkMaxSteps def getStateReprLen(self): return self.stateReprLen def getHiddenLayer1(self): return self.hiddenLayer1 def getHiddenLayer2(self): return self.hiddenLayer2 def getHiddenLayer3(self): return self.hiddenLayer3 def getNumActions(self): return self.num_actions def getLearningRate(self): return self.learningRate def getActivationFuncHidden(self): return self.activationFuncHidden def getActivationFuncOutput(self): return self.activationFuncOutput def getOptimizer(self): return self.optimizer def getLoadedModelName(self): return self.loadedModelName def getActions(self): return self.actions def getTargetNetwork(self): return self.targetNetwork def getValueNetwork(self): return self.valueNetwork
# model.add(Dropout(0.2)) model.add(LGRU2(80, return_sequences=True, inner_activation='sigmoid', activation='tanh' ) ) # # model.add(Dropout(0.2)) model.add(LGRU2(90, return_sequences=True, inner_activation='sigmoid', activation='tanh' ) ) model.add(TimeDistributedDense(outputsize)) model.add(Activation('softmax')) opt = RMSprop(lr=learning_rate, rho=0.9, epsilon=1e-6, clipvalue=clipval) model.compile(loss='categorical_crossentropy', optimizer=opt) res = pickle.load(gzip.open(paramsfile,'r')) W = res['weights'] model.set_weights(W) print(' -- Text sampling ---') temperatures = [0.7, 1] generated = text_sampling_char( model,vocabs, temperatures, ns=400)
cm_train = [] acc_train = [] j = 0 for percent in percent_jam_list: print("------- Jammed Resample - ", percent * 100, "% ------") X_train, X_test, y_train, y_test = pre.split_data(X, y, 0.2, random_seed) X_train, y_train = resample_smote(X_train, y_train, None, percent, None, random_seed) X_train, X_test = pre.standardize_data(X_train, X_test) y_train = y_train.reshape(-1, 1) y_test = y_test.reshape(-1, 1) y_train = onehotencoder.fit_transform(y_train).toarray() y_test = onehotencoder.fit_transform(y_test).toarray() mlp_cls.set_weights(weigths) mlp_cls.fit(X_train, y_train, epochs=epochs, batch_size=10, verbose=2) y_predict = mlp_cls.predict(X_test) y_pred_fin = np.zeros(y_predict.shape[0]) for i in range(0, y_predict.shape[0]): state = np.argmax(y_predict[i]) if state == 0: y_pred_fin[i] = 1 elif state == 1: y_pred_fin[i] = 2 else: y_pred_fin[i] = 3 y_test = onehotencoder.inverse_transform(y_test)
def train(self, val_score): params = self.def_params.copy() np.random.seed(1) tf.set_random_seed(2) # Start Gridsearch best_AUC = 0.5 AUC_vals = [] for tune in ParameterGrid(self.tuning_params): params.update(tune) callbacks = [ EarlyStopping(monitor=val_score, min_delta=0.01, patience=params['iter_patience'], mode='max') ] optimizer = eval('keras.optimizers.' + params['optimizer'])(lr=params['learning_rate']) model = Sequential() model.add( Conv3D(32, (3, 3, 3), activation=params['hidden_activation'], padding=params['padding'], input_shape=(156, 192, 64, 1))) #model.add(BatchNormalization()) model.add(MaxPooling3D(pool_size=(2, 2, 2))) model.add( Conv3D(64, (3, 3, 3), activation=params['hidden_activation'], padding=params['padding'])) #model.add(BatchNormalization()) model.add(MaxPooling3D(pool_size=(3, 3, 3))) model.add( Conv3D(128, (3, 3, 3), activation=params['hidden_activation'], padding=params['padding'])) #model.add(BatchNormalization()) model.add(MaxPooling3D(pool_size=(4, 4, 4))) model.add(Flatten()) model.add(Dense(256, activation=params['hidden_activation'])) #model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add(Dense(1, activation=params['out_activation'])) model.compile(loss=params['loss_func'], optimizer=optimizer, metrics=[eval(val_score)]) parallel_model = multi_gpu_model(model, params['number_of_gpus']) parallel_model.compile(loss=params['loss_func'], optimizer=optimizer, metrics=[eval(val_score)]) history = parallel_model.fit(X_tr, y_tr, callbacks=callbacks, validation_data=(X_val, y_val), epochs=params['epochs'], batch_size=params['batch_size'], verbose=0) model.set_weights(parallel_model.get_weights()) AUC_val = history.history['val_' + val_score][-1] AUC_vals.append(AUC_val) if AUC_val > best_AUC: best_AUC = AUC_val self.best_model = model self.best_params = tune self.AUC_val = best_AUC return AUC_vals
class DQNSolver: def __init__(self, observation_space, action_space): self.exploration_rate = EXPLORATION_MAX self.action_space = action_space self.memory = deque(maxlen=MEMORY_SIZE) self.tau = 0.05 self.model = Sequential() self.model.add( Dense(24, input_dim=observation_space.shape[0], activation="relu")) self.model.add(Dense(48, activation="relu")) self.model.add(Dense(24, activation="relu")) self.model.add(Dense(self.action_space.n)) self.model.compile(loss="mean_squared_error", optimizer=Adam(lr=LEARNING_RATE)) self.target_model = Sequential() self.target_model.add( Dense(24, input_dim=observation_space.shape[0], activation="relu")) self.target_model.add(Dense(48, activation="relu")) self.target_model.add(Dense(24, activation="relu")) self.target_model.add(Dense(self.action_space.n)) self.target_model.compile(loss="mean_squared_error", optimizer=Adam(lr=LEARNING_RATE)) def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) def act(self, state): self.exploration_rate *= EXPLORATION_DECAY self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate) if np.random.random() < self.exploration_rate: print("exploring") return self.action_space.sample() prediction = self.model.predict(state)[0] # print(prediction) return np.argmax(prediction) def experience_replay(self): if len(self.memory) < BATCH_SIZE: return batch = random.sample(self.memory, BATCH_SIZE) for state, action, reward, state_next, terminal in batch: target = self.target_model.predict(state) if terminal: target[0][action] = reward else: target[0][action] = reward + max( self.target_model.predict(state_next)[0]) * GAMMA self.model.fit(state, target, epochs=1, verbose=0) # q_update = reward # if not terminal: # q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0])) # q_values = self.model.predict(state) # q_values[0][action] = q_update # self.model.fit(state, q_values, verbose=0) # self.exploration_rate *= EXPLORATION_DECAY # self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate) def target_train(self): weights = self.model.get_weights() target_weights = self.target_model.get_weights() for i in range(len(target_weights)): target_weights[i] = weights[i] self.target_model.set_weights(target_weights)
kernel_regularizer=regularizers.l1_l2(valr[0], valr[1]), bias_regularizer=regularizers.l1_l2(valr[0], valr[1]))) model.add(Dropout(drop)) model.add( Dense(outs, activation='tanh', use_bias=True, bias_initializer="zeros", kernel_regularizer=regularizers.l1_l2(valr[0], valr[1]), bias_regularizer=regularizers.l1_l2(valr[0], valr[1]))) sgd = optims.SGD(lr=clr, momentum=0.9, decay=0.00, nesterov=False) model.compile(optimizer=sgd, loss='mean_squared_error', metrics=['accuracy']) #set keras weights model.set_weights(currwg) #fit keras model his = model.fit(dataset.train[0], dataset.train[1], batch_size=dataset.train[0].shape[0], epochs=epochs, shuffle=True) plt.plot(his.history['loss'], label='keras loss', ls=":") plt.title('keras comparison') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(loc='upper right', prop={'size': 18}) plt.show()
class LearningShorts(BaseSim): def __init__(self, modelDir, epoch, activations=True, featureDim=9, outputDim=400, hdVideo=False): self.activations = activations self.hdVideo = hdVideo super(LearningShorts, self).__init__(maxSteps=1199, enableWind=False, pressurePointCount=outputDim, showForcemap=True, dualArm=True, isArm=False) # Preprocessing functions self.functions = [ datapreprocess.gripperForce, datapreprocess.gripperTorque, datapreprocess.gripperVelocity ] self.labelFunction = datapreprocess.rawPressuremap # Load learning algorithm architecture and model weights self.model = util.loadModel(modelDir, epoch) # Make our LSTM stateful for fast predictions weights = self.model.get_weights() self.model = Sequential() self.model.add( LSTM(50, batch_input_shape=(1, 1, featureDim), activation='tanh', return_sequences=True, stateful=True)) self.model.add( LSTM(50, activation='tanh', return_sequences=True, stateful=True)) self.model.add( LSTM(50, activation='tanh', return_sequences=True, stateful=True)) self.model.add( TimeDistributed(Dense(outputDim * 2, activation='linear'))) self.model.compile(loss='mse', optimizer='rmsprop', metrics=['accuracy']) self.model.set_weights(weights) # Compile and prepare model self.y = None self.predictions = None self.predActivations = None self.yActivations = None def initialize(self): self.initData() self.varySimulation() self.recordArmSpheres() def initData(self): totalSteps = 700 self.data = { 'recordedTimes': [[]], 'gripperForce': [[]], 'gripperTorque': [[]], 'gripperPos': [[]], 'forcemap': [[]], 'armSpheres': [[]] } self.prevData = None self.prevProgress = None self.startPos = None self.prevTime = 0 self.prevPos = 0 # Deterministic output self.random = np.random.RandomState(1000) self.colors = [plt.cm.jet(i) for i in np.linspace(0, 0.9, 100)] self.rotateFist = [0, 0, 1, -np.radians(45)] self.startTime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') # Learning self.y = None self.predictions = None self.predActivations = None self.yActivations = None self.points = None def varySimulation(self): # Randomize arm location # Horizontal (+ is towards gripper), Vertical (+ is upwards towards sky), sideways (+ is towards camera, arm's right side) # These simulations "should" all successfully enter the sleeve # self.simulator.positionArm(self.random.uniform(-0.05, 0.05), self.random.uniform(-0.075, 0.0), self.random.uniform(-0.05, 0.05)) # These simulations may miss the sleeve or get caught # self.simulator.positionArm(self.random.uniform(-0.05, 0.05), self.random.uniform(-0.4, 0.0), self.random.uniform(-0.1, 0.1)) self.simulator.positionArm(0.0, -0.0785, 0.0, rotateFist=self.rotateFist, rotateArm=[0, 0, 0, 0]) velocityFactor = 2.0 # For video recording self.camRotateX = 10.8 self.camRotateY = -128.6 self.camTranslateX = 0.150759166667 self.camTranslateY = -0.152780833333 self.camDistance = -10.68 self.simulator.setVelocityFactor(velocityFactor) splines = [] splineCount = int(round(4 * velocityFactor)) side = 0 # self.random.randint(1, 2) splines.append([ -0.7 * velocityFactor / splineCount + self.random.uniform(-0.03, 0.03), -0.025, (-1)**side * self.random.uniform(0, 0.05), 0, 1, 0, 0 ]) splines.append([ -0.7 * velocityFactor * 2 / splineCount + self.random.uniform(-0.03, 0.03), -0.05, (-1)**(1 + side) * self.random.uniform(0, 0.05), 0, 1, 0, 0 ]) for i in xrange(3, splineCount + 1): # [tx, ty, tz, rw, rx, ry, rz] # [0.7 max length, 0 height, alternate sides of arm] splines.append([ -0.7 * velocityFactor * i / splineCount + self.random.uniform(-0.03, 0.03), -0.1 + self.random.uniform(-0.02, 0.02), (-1)**((i - 1) + side) * self.random.uniform(0, 0.05), 0, 1, 0, 0 ]) print splines self.simulator.initSpline(splines) def recordArmSpheres(self): # Record the location for each sphere of the arm # Order of spheres: Hand, wrist, elbow, shoulder spheres = self.simulator.getArmSpheres() self.data['armSpheres'][0] = [[s.x, s.y, s.z, s.w] for s in spheres] def performLearning(self): # Preprocess data X = np.concatenate([ fun(self.data, prevData=self.prevData) if fun == datapreprocess.gripperVelocity else fun(self.data) for fun in self.functions ], axis=2) self.points, self.y = self.labelFunction(self.data['forcemap'], np.array( self.data['armSpheres']), knearest=5, isArm=False, retPoints=True, rotateFist=self.rotateFist, precomputedPoints=self.points) # Predict data self.predictions = self.model.predict(X, batch_size=1) if self.activations: self.predActivations = self.predictions[:, :, self.predictions. shape[-1] / 2:] self.predictions = self.predictions[:, :, :self.predictions. shape[-1] / 2] self.yActivations = self.y[:, :, self.y.shape[-1] / 2:] self.y = self.y[:, :, :self.y.shape[-1] / 2] def recordData(self): self.prevData = copy.deepcopy(self.data) # Get Simulation time t = self.simulator.recorded_time[-1] self.prevTime = self.data['recordedTimes'][0][0] if self.data[ 'recordedTimes'][0] else 0 self.data['recordedTimes'][0] = [t] # Get force data from gripper gripForce = self.simulator.rig_parts[1].recorded_forces[-1] self.data['gripperForce'][0] = [[ gripForce.x, gripForce.y, gripForce.z ]] # Get torque data from gripper gripTorque = self.simulator.rig_parts[1].recorded_torques[-1] self.data['gripperTorque'][0] = [[ gripTorque.x, gripTorque.y, gripTorque.z ]] # Gripper position gripPos = self.simulator.getGripperPos() self.prevPos = self.data['gripperPos'][0][0] if self.data[ 'gripperPos'][0] else [0, 0, 0] self.data['gripperPos'][0] = [[gripPos.x, gripPos.y, gripPos.z]] # Get data of forces applied to the arm pxForcemap = self.simulator.getForcemap() # Data is currently a list of PxVec4. Turn this into a list of lists forcemap = [] for vec4 in pxForcemap: forcemap.append([vec4.x, vec4.y, vec4.z, vec4.w]) self.data['forcemap'][0] = [forcemap] # Perform learning on new data to estimate pressure distribution map if self.simStep > 1: self.performLearning() # if self.simStep % 4 == 0: # self.performLearning() # else: # self.predActivations[0, self.simStep] = self.predActivations[0, self.simStep - 1] # self.predictions[0, self.simStep] = self.predictions[0, self.simStep - 1] # self.yActivations[0, self.simStep] = self.yActivations[0, self.simStep - 1] # self.y[0, self.simStep] = self.y[0, self.simStep - 1] def getPoints(self): if self.y is not None: # Set unactivated points to 0 predicts = self.predictions[0, 0] predicts[self.predActivations[0, 0] < 0] = 0 predicts[predicts < 0] = 0 predicts = predicts[:, np.newaxis] # Limit magnitudes to range of [0, 1] predicts = np.min(np.concatenate( [predicts / 2.0, np.ones((len(predicts), 1))], axis=1), axis=1) return np.concatenate([self.points, predicts[:, np.newaxis]], axis=1) else: if self.points is None: return np.concatenate( [[[0, 0, 0]], np.zeros((len([[0, 0, 0]]), 1))], axis=1) else: return np.concatenate( [self.points, np.zeros((len(self.points), 1))], axis=1) def getTruePoints(self): if self.y is not None: y = self.y[0, 0][:, np.newaxis] # Limit magnitudes to range of [0, 1] y = np.min(np.concatenate([y / 2.0, np.ones((len(y), 1))], axis=1), axis=1) return np.concatenate([self.points, y[:, np.newaxis]], axis=1) else: if self.points is None: return np.concatenate( [[[0, 0, 0]], np.zeros((len([[0, 0, 0]]), 1))], axis=1) else: return np.concatenate( [self.points, np.zeros((len(self.points), 1))], axis=1) def renderDualObjects(self): if self.simStep > 1: pos = np.array(self.data['gripperPos'][0][0]) # Velocity velocity = (pos - np.array(self.prevPos)) / ( self.data['recordedTimes'][0][0] - self.prevTime) # Normalize factor = 0.1 velocity = velocity / np.linalg.norm(velocity) * factor force = np.array(self.data['gripperForce'][0][0]) / np.linalg.norm( self.data['gripperForce'][0][0]) * factor torque = np.array( self.data['gripperTorque'][0][0]) / np.linalg.norm( self.data['gripperTorque'][0][0]) * factor self.createLine(pos, pos + velocity, 0.96, 0.27, 0.21, 0.9, linewidth=4.0) self.createLine(pos, pos + force, 0.12, 0.59, 0.94, 0.9, linewidth=4.0) self.createLine(pos, pos + torque, 1.0, 0.59, 0, 0.9, linewidth=4.0) if self.hdVideo: # Save screenshots for video if self.simStep >= 699: exit() if self.simStep != 0: self.saveScreenshot('movieLeg_' + self.startTime) def renderObjects(self): pass