def modeling(self, l = [2121, 100, 50, 10, 1]): """ Generate a model with the give number of layers. Previously, always a 5 layer model is generated. Now it is changed to make adaptive number of layers. If l = [2121, 1], it is linear regressoin method. - l2_param is a self parameter rather than an input parameter. """ l2_param = self.l2_param model = Sequential() model.add(Dense( l[1], input_shape=(l[0],))) model.regularizers = [l2(l2_param)] for n_w_l in l[2:]: model.add(Activation('relu')) #model.add(Dropout(0.4)) # model.add(Dense( n_w_l, W_regularizer = l2(.01))) model.add(Dense( n_w_l)) return model
def trainLayer(self, data=None, trgt=None, ifold=0, hidden_neurons=[400], layer=1, regularizer=None, regularizer_param=None): ''' Method used to perform the layerwise algorithm to train the SAE ''' # Change elements equal to zero to one for i in range(len(hidden_neurons)): if hidden_neurons[i] == 0: hidden_neurons[i] = 1 if (layer <= 0) or (layer > len(hidden_neurons)): print "[-] Error: The parameter layer must be greater than zero and less or equal to the length of list hidden_neurons" return -1 if self.trn_params.params['verbose']: print '[+] Using %s as optmizer algorithm' % self.trn_params.params[ 'optmizerAlgorithm'] neurons_str = self.getNeuronsString(data, hidden_neurons[:layer]) if regularizer != None and len(regularizer) != 0: model_str = '%s/%s/%s_%i_folds_%s_%s_neurons_%s_regularizer(%f)' % ( self.save_path, self.analysis_str, self.prefix_str, self.n_folds, self.params_str, neurons_str, regularizer, regularizer_param) else: model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % ( self.save_path, self.analysis_str, self.prefix_str, self.n_folds, self.params_str, neurons_str) if not self.development_flag: file_name = '%s_fold_%i_model.h5' % (model_str, ifold) if os.path.exists(file_name): if self.trn_params.params['verbose']: print 'File %s exists' % (file_name) # load model file_name = '%s_fold_%i_model.h5' % (model_str, ifold) classifier = load_model( file_name, custom_objects={ '%s' % self.trn_params.params['loss']: self.lossFunction }) file_name = '%s_fold_%i_trn_desc.jbl' % (model_str, ifold) trn_desc = joblib.load(file_name) return ifold, classifier, trn_desc else: file_name = '%s_fold_%i_model_dev.h5' % (model_str, ifold) if os.path.exists(file_name): if self.trn_params.params['verbose']: print 'File %s exists' % (file_name) # load model file_name = '%s_fold_%i_model_dev.h5' % (model_str, ifold) classifier = load_model( file_name, custom_objects={ '%s' % self.trn_params.params['loss']: self.lossFunction }) file_name = '%s_fold_%i_trn_desc_dev.jbl' % (model_str, ifold) trn_desc = joblib.load(file_name) return ifold, classifier, trn_desc norm_data = self.normalizeData(data, ifold) best_init = 0 best_loss = 10e6 classifier = [] trn_desc = {} for i_init in range(self.n_inits): model = Sequential() proj_all_data = norm_data if layer == 1: model.add( Dense(units=hidden_neurons[layer - 1], input_dim=data.shape[1], kernel_initializer="uniform")) model.add( Activation(self.trn_params.params['hidden_activation'])) model.add( Dense(units=data.shape[1], input_dim=hidden_neurons[layer - 1], kernel_initializer="uniform")) model.add( Activation(self.trn_params.params['output_activation'])) elif layer > 1: for ilayer in range(1, layer): neurons_str = self.getNeuronsString( data, hidden_neurons[:ilayer]) if regularizer != None and len(regularizer) != 0: previous_model_str = '%s/%s/%s_%i_folds_%s_%s_neurons_%s_regularizer(%f)' % ( self.save_path, self.analysis_str, self.prefix_str, self.n_folds, self.params_str, neurons_str, regularizer, regularizer_param) else: previous_model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % ( self.save_path, self.analysis_str, self.prefix_str, self.n_folds, self.params_str, neurons_str) if not self.development_flag: file_name = '%s_fold_%i_model.h5' % ( previous_model_str, ifold) else: file_name = '%s_fold_%i_model_dev.h5' % ( previous_model_str, ifold) # Check if previous layer model was trained if not os.path.exists(file_name): self.trainLayer(data=data, trgt=trgt, ifold=ifold, hidden_neurons=hidden_neurons[:ilayer], layer=ilayer, regularizer=regularizer, regularizer_param=regularizer_param) layer_model = load_model( file_name, custom_objects={ '%s' % self.trn_params.params['loss']: self.lossFunction }) get_layer_output = K.function( [layer_model.layers[0].input], [layer_model.layers[1].output]) # Projection of layer proj_all_data = get_layer_output([proj_all_data])[0] model.add( Dense(units=hidden_neurons[layer - 1], input_dim=proj_all_data.shape[1], kernel_initializer="uniform")) model.add( Activation(self.trn_params.params['hidden_activation'])) if regularizer == "dropout": model.add(Dropout(regularizer_param)) elif regularizer == "l1": model.regularizers = [l1(regularizer_param)] elif regularizer == "l2": model.regularizers = [l2(regularizer_param)] model.add( Dense(units=proj_all_data.shape[1], input_dim=hidden_neurons[layer - 1], kernel_initializer="uniform")) model.add( Activation(self.trn_params.params['output_activation'])) norm_data = proj_all_data # end of elif layer > 1: print 'Layer: %i - Neuron: %i - Fold %i of %i Folds - Init %i of %i Inits' % ( layer, hidden_neurons[layer - 1], ifold + 1, self.n_folds, i_init + 1, self.n_inits) model.compile(loss=self.lossFunction, optimizer=self.optmizer, metrics=self.trn_params.params['metrics']) # Train model earlyStopping = callbacks.EarlyStopping( monitor='val_loss', patience=self.trn_params.params['patience'], verbose=self.trn_params.params['train_verbose'], mode='auto') init_trn_desc = model.fit( norm_data, norm_data, epochs=self.trn_params.params['n_epochs'], batch_size=self.trn_params.params['batch_size'], callbacks=[earlyStopping], verbose=self.trn_params.params['verbose'], validation_split=0.15, shuffle=True) if np.min(init_trn_desc.history['val_loss']) < best_loss: best_init = i_init best_loss = np.min(init_trn_desc.history['val_loss']) classifier = model trn_desc['epochs'] = init_trn_desc.epoch for imetric in range(len(self.trn_params.params['metrics'])): if self.trn_params.params['metrics'][ imetric] == 'accuracy': metric = 'acc' else: metric = self.trn_params.params['metrics'][imetric] trn_desc[metric] = init_trn_desc.history[metric] trn_desc['val_' + metric] = init_trn_desc.history['val_' + metric] trn_desc['loss'] = init_trn_desc.history['loss'] trn_desc['val_loss'] = init_trn_desc.history['val_loss'] # save model if not self.development_flag: file_name = '%s_fold_%i_model.h5' % (model_str, ifold) classifier.save(file_name) file_name = '%s_fold_%i_trn_desc.jbl' % (model_str, ifold) joblib.dump([trn_desc], file_name, compress=9) else: file_name = '%s_fold_%i_model_dev.h5' % (model_str, ifold) classifier.save(file_name) file_name = '%s_fold_%i_trn_desc_dev.jbl' % (model_str, ifold) joblib.dump([trn_desc], file_name, compress=9) return ifold, classifier, trn_desc
def trainLayer(self, data=None, trgt=None, transformed_data=None, ifold=0, hidden_neurons=[80], layer=1, folds_sweep=False, regularizer=None, regularizer_param=None, sort=999, etBinIdx=999, etaBinIdx=999, tuning_folder=None): # Change elements equal to zero to one for i in range(len(hidden_neurons)): if hidden_neurons[i] == 0: hidden_neurons[i] = 1 if (layer <= 0) or (layer > len(hidden_neurons)): print "[-] Error: The parameter layer must be greater than zero and less or equal to the length of list hidden_neurons" return -1 if self.trn_params.params['verbose']: print '[+] Using %s as optmizer algorithm' % self.trn_params.params[ 'optmizerAlgorithm'] neurons_str = self.getNeuronsString(data, hidden_neurons[:layer]) if regularizer != None and len(regularizer) != 0: model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % ( self.save_path, self.analysis_str, self.prefix_str, self.n_folds, self.params_str, neurons_str) else: model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % ( self.save_path, self.analysis_str, self.prefix_str, self.n_folds, self.params_str, neurons_str) if not self.development_flag: file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_model.h5' % ( model_str, sort, etBinIdx, etaBinIdx, self._layerNumber) #file_name = '%s_fold_%i_model.h5'%(model_str,ifold) #print file_name if os.path.exists(file_name): #print 'LOSSSS' #print self.trn_params.params['loss'] #print self.lossFunction custom_obj = {} if self._aetype == 'contractive': # def contractive_loss(y_pred, y_true): # lam = 1e-4 # mse = K.mean(K.square(y_true - y_pred), axis=1) # W = K.variable(value=model.get_layer('encoded').get_weights()[0]) # N x N_hidden # W = K.transpose(W) # N_hidden x N # h = model.get_layer('encoded').output # dh = h * (1 - h) # N_batch x N_hidden # # N_batch x N_hidden * N_hidden x 1 = N_batch x 1 # contractive = lam * K.sum(dh**2 * K.sum(W**2, axis=1), axis=1) # return mse + contractive # #usedloss=contractive_loss # #self.lossFunction= from TuningTools.MetricsLosses import contractive_loss custom_obj['contractive_loss'] = contractive_loss( hidden_neurons[layer - 1], data.shape[1], self.trn_params.params['hidden_activation'], self.trn_params.params['output_activation']) else: custom_obj[ self.trn_params.params['loss']] = self.lossFunction #usedloss=self.lossFunction #print usedloss if self.trn_params.params['verbose']: print 'File %s exists' % (file_name) # load model file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_model.h5' % ( model_str, sort, etBinIdx, etaBinIdx, self._layerNumber) #file_name = '%s_fold_%i_model.h5'%(model_str,ifold) #classifier = load_model(file_name, custom_objects={'loss': usedloss}) ###self.lossFunction}) classifier = load_model(file_name, custom_objects=custom_obj) #classifier = load_model(file_name, custom_objects={'%s'%self.trn_params.params['loss']: self.lossFunction}) file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_trn_desc.jbl' % ( model_str, sort, etBinIdx, etaBinIdx, self._layerNumber) trn_desc = joblib.load(file_name) file_name_prefix = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i' % ( model_str, sort, etBinIdx, etaBinIdx, self._layerNumber) with open(self.save_path + tuning_folder, 'a+') as t_file: t_file.write(file_name_prefix + "\n") t_file.close() return ifold, classifier, trn_desc else: file_name = '%s_sort_%i_etbin_%i_etabin_%i_model_dev.h5' % ( model_str, sort, etBinIdx, etaBinIdx) #file_name = '%s_fold_%i_model_dev.h5'%(model_str,ifold) if os.path.exists(file_name): if self.trn_params.params['verbose']: print 'File %s exists' % (file_name) # load model file_name = '%s_sort_%i_etbin_%i_etabin_%i_model_dev.h5' % ( model_str, sort, etBinIdx, etaBinIdx) #file_name = '%s_fold_%i_model_dev.h5'%(model_str,ifold) classifier = load_model( file_name, custom_objects={ '%s' % self.trn_params.params['loss']: self.lossFunction }) file_name = '%s_sort_%i_etbin_%i_etabin_%i_trn_desc_dev.jbl' % ( model_str, sort, etBinIdx, etaBinIdx) trn_desc = joblib.load(file_name) return ifold, classifier, trn_desc #train_id, test_id = self.CVO[ifold] #norm_data = self.normalizeData(data, ifold) #norm_data = data best_init = 0 best_loss = 9999999 classifier = [] trn_desc = {} print 'Number of SAE training inits: ' + str(self.n_inits) for i_init in range(self.n_inits): #print 'Number of SAE training inits: '+ str(self.n_inits) print 'Layer: %i - Neuron: %i - Fold %i of %i Folds - Init %i of %i Inits' % ( self._layerNumber, hidden_neurons[layer - 1], ifold + 1, self.n_folds, i_init + 1, self.n_inits) model = Sequential() #proj_all_data = norm_data proj_all_data = data if layer == 1: print 'LAYER 1' print hidden_neurons[layer - 1], data.shape[1] if regularizer == 'l1': model.add( Dense(hidden_neurons[layer - 1], input_dim=data.shape[1], activity_regularizer=regularizers.l1( regularizer_param), name='encoded')) else: model.add( Dense(hidden_neurons[layer - 1], input_dim=data.shape[1], name='encoded')) model.add( Activation(self.trn_params.params['hidden_activation'])) if regularizer == "dropout": model.add(Dropout(regularizer_param)) #elif regularizer == "l1": # model.regularizers = [l1(regularizer_param)] #elif regularizer == "l2": # model.regularizers = [l2(regularizer_param)] model.add(Dense(data.shape[1])) model.add( Activation(self.trn_params.params['output_activation'])) elif layer > 1: for ilayer in range(1, layer): print hidden_neurons[:ilayer] neurons_str = self.getNeuronsString( data, hidden_neurons[:ilayer]) previous_model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % ( self.save_path, self.analysis_str, self.prefix_str, self.n_folds, self.params_str, neurons_str) if not self.development_flag: file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_model.h5' % ( previous_model_str, sort, etBinIdx, etaBinIdx, self._layerNumber) #file_name = '%s_fold_%i_model.h5'%(previous_model_str,ifold) else: file_name = '%s_sort_%i_etbin_%i_etabin_%i_model_dev.h5' % ( previous_model_str, sort, etBinIdx, etaBinIdx) #file_name = '%s_fold_%i_model_dev.h5'%(previous_model_str,ifold) # Check if previous layer model was trained if not os.path.exists(file_name): self.trainLayer(data=data, trgt=trgt, transformed_data=transformed_data, ifold=ifold, hidden_neurons=hidden_neurons[:ilayer], layer=ilayer, folds_sweep=True) custom_obj = {} if self._aetype == 'contractive': from TuningTools.MetricsLosses import contractive_loss custom_obj['contractive_loss'] = contractive_loss( hidden_neurons[layer - 1], data.shape[1], self.trn_params.params['hidden_activation'], self.trn_params.params['output_activation']) else: custom_obj[ self.trn_params.params['loss']] = self.lossFunction #print "Loading Model: "+file_name layer_model = load_model(file_name, custom_objects=custom_obj) #layer_model = load_model(file_name, custom_objects={'%s'%self.trn_params.params['loss']: self.lossFunction}) get_layer_output = K.function( [layer_model.layers[0].input], [layer_model.layers[1].output]) # Projection of layer proj_all_data = get_layer_output([proj_all_data])[0] model.add( Dense(hidden_neurons[layer - 1], input_dim=proj_all_data.shape[1], name='encoded')) model.add( Activation(self.trn_params.params['hidden_activation'])) if regularizer == "dropout": model.add(Dropout(regularizer_param)) elif regularizer == "l1": model.regularizers = [l1(regularizer_param)] elif regularizer == "l2": model.regularizers = [l2(regularizer_param)] model.add(Dense(proj_all_data.shape[1])) model.add( Activation(self.trn_params.params['output_activation'])) #norm_data = proj_all_data data = proj_all_data # end of elif layer > 1: if self._aetype == 'contractive': def contractive_loss(y_pred, y_true): lam = 1e-4 mse = K.mean(K.square(y_true - y_pred), axis=1) W = K.variable(value=model.get_layer( 'encoded').get_weights()[0]) # N x N_hidden W = K.transpose(W) # N_hidden x N h = model.get_layer('encoded').output dh = h * (1 - h) # N_batch x N_hidden # N_batch x N_hidden * N_hidden x 1 = N_batch x 1 contractive = lam * K.sum(dh**2 * K.sum(W**2, axis=1), axis=1) return mse + contractive usedloss = contractive_loss else: usedloss = self.lossFunction model.compile( loss=usedloss, #self.lossFunction, optimizer=self.optmizer, metrics=self.trn_params.params['metrics']) # Train model earlyStopping = callbacks.EarlyStopping( monitor='val_loss', patience=5, #self.trn_params.params['patience'], verbose=self.trn_params.params['train_verbose'], mode='auto') ae_encoding_name = '%s_%i_folds_%s_%s_neurons' % ( self.prefix_str, self.n_folds, self.params_str, neurons_str) ae_encoding = ae_encoding_name.split('_') ae_encoding_string = ae_encoding[0] + '_' + ae_encoding[ 24] + '_sort_%i_et_%i_eta_%i_layer_%i' % ( sort, etBinIdx, etaBinIdx, self._layerNumber) tbCallBack = keras.callbacks.TensorBoard( log_dir='/home/caducovas/tensorboard/EncodingError/' + ae_encoding_string, histogram_freq=1, write_graph=True, write_images=True, write_grads=True, update_freq='batch') import time import datetime start_run = time.time() if self._aetype == 'discriminant': init_trn_desc = model.fit( data, transformed_data, nb_epoch=300, #self.trn_params.params['n_epochs'], batch_size=1024, #self.trn_params.params['batch_size'], callbacks=[earlyStopping], #, tbCallBack], verbose=1) else: init_trn_desc = model.fit( data, data, nb_epoch=self.trn_params.params['n_epochs'], batch_size=1024, #self.trn_params.params['batch_size'], callbacks=[earlyStopping], #, tbCallBack], verbose=1, #self.trn_params.params['verbose'], validation_data=(trgt, trgt)) end_run = time.time() print 'Model took ' + str( datetime.timedelta(seconds=(end_run - start_run))) + ' to finish.' if self._aetype == 'discriminant': classifier = model trn_desc['epochs'] = init_trn_desc.epoch trn_desc['loss'] = init_trn_desc.history['loss'] else: if np.min(init_trn_desc.history['val_loss']) < best_loss: best_init = i_init best_loss = np.min(init_trn_desc.history['val_loss']) classifier = model trn_desc['epochs'] = init_trn_desc.epoch #for imetric in range(len(self.trn_params.params['metrics'])): # if self.trn_params.params['metrics'][imetric] == 'kullback_leibler_divergence': # metric = kullback_leibler_divergence # else: # metric = self.trn_params.params['metrics'][imetric] # trn_desc[metric] = init_trn_desc.history[metric] # trn_desc['val_'+metric] = init_trn_desc.history['val_'+metric] trn_desc['loss'] = init_trn_desc.history['loss'] trn_desc['val_losS'] = init_trn_desc.history['val_loss'] trn_desc[ 'kullback_leibler_divergence'] = init_trn_desc.history[ 'kullback_leibler_divergence'] trn_desc[ 'val_kullback_leibler_divergence'] = init_trn_desc.history[ 'val_kullback_leibler_divergence'] model.summary() # save model if not self.development_flag: file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_model.h5' % ( model_str, sort, etBinIdx, etaBinIdx, self._layerNumber) #file_name = '%s_fold_%i_model.h5'%(model_str,ifold) classifier.save(file_name) file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_trn_desc.jbl' % ( model_str, sort, etBinIdx, etaBinIdx, self._layerNumber) #file_name = '%s_fold_%i_trn_desc.jbl'%(model_str,ifold) joblib.dump([trn_desc], file_name, compress=9) file_name_prefix = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i' % ( model_str, sort, etBinIdx, etaBinIdx, self._layerNumber) with open(self.save_path + tuning_folder, 'a+') as t_file: t_file.write(file_name_prefix + "\n") t_file.close() else: file_name = '%s_sort_%i_etbin_%i_etabin_%i_model_dev.h5' % ( model_str, sort, etBinIdx, etaBinIdx) #file_name = '%s_fold_%i_model_dev.h5'%(model_str,ifold) classifier.save(file_name) file_name = '%s_sort_%i_etbin_%i_etabin_%i_trn_desc_dev.jbl' % ( model_str, sort, etBinIdx, etaBinIdx) #file_name = '%s_fold_%i_trn_desc_dev.jbl'%(model_str,ifold) joblib.dump([trn_desc], file_name, compress=9) return ifold, classifier, trn_desc